Exemple #1
0
    def clonescan(self, id):
        sf = SpiderFoot(self.config)
        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()
        info = dbh.scanInstanceGet(id)
        scanconfig = dbh.scanConfigGet(id)
        scanname = info[0]
        scantarget = info[1]
        targetType = None

        if scanname == "" or scantarget == "" or len(scanconfig) == 0:
            return self.error("Something went wrong internally.")

        targetType = sf.targetType(scantarget)
        if targetType == None:
            # It must be a name, so wrap quotes around it
            scantarget = """ + scantarget + """

        modlist = scanconfig['_modulesenabled'].split(',')

        templ = Template(filename='dyn/newscan.tmpl', lookup=self.lookup)
        return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot,
                            modules=self.config['__modules__'], selectedmods=modlist,
                            scanname=unicode(scanname, 'utf-8', errors='replace'), 
                            scantarget=unicode(scantarget, 'utf-8', errors='replace'))
Exemple #2
0
    def __init__(self, opts):
        global sf

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(opts['__database'], timeout=10)
        if dbh == None:
            sf.fatal("Could not connect to internal database, and couldn't create " + \
                opts['__database'])
        dbh.text_factory = str

        self.conn = dbh

        self.dbh = dbh.cursor()
        sf = SpiderFoot(opts)

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
        except sqlite3.Error:
            # .. If not set up, we set it up.
            try:
                self.create()
            except BaseException as e:
                sf.error("Tried to set up the SpiderFoot database schema, but failed: " + \
                    e.args[0])
        return
Exemple #3
0
    def savesettingsraw(self, allopts, token):
        if str(token) != str(self.token):
            return json.dumps(["ERROR", "Invalid token (" + str(self.token) + ")."])

        try:
            dbh = SpiderFootDb(self.config)
            # Reset config to default
            if allopts == "RESET":
                dbh.configClear()  # Clear it in the DB
                self.config = deepcopy(self.defaultConfig)  # Clear in memory
            else:
                useropts = json.loads(allopts)
                cleanopts = dict()
                for opt in useropts.keys():
                    cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0]

                currentopts = deepcopy(self.config)

                # Make a new config where the user options override
                # the current system config.
                sf = SpiderFoot(self.config)
                self.config = sf.configUnserialize(cleanopts, currentopts)
                dbh.configSet(sf.configSerialize(currentopts))
        except Exception as e:
            return json.dumps(["ERROR", "Processing one or more of your inputs failed: " + str(e)])

        return json.dumps(["SUCCESS", ""])
Exemple #4
0
    def __init__(self, opts):
        global sf

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(opts['__database'], timeout=10)
        if dbh == None:
            sf.error("Could not connect to internal database. Check that " + \
                opts['__database'] + " exists and is readable and writable.")
        dbh.text_factory = str

        self.conn = dbh

        self.dbh = dbh.cursor()
        sf = SpiderFoot(opts)

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
        except sqlite3.Error:
            sf.error("Found spiderfoot.db but it doesn't appear to be in " \
                "the expected state - ensure the schema is created.")

        return
    def savesettings(self, allopts, token):
        if str(token) != str(self.token):
            return self.error("Invalid token (" + str(self.token) + ").")

        try:
            dbh = SpiderFootDb(self.config)
            # Reset config to default
            if allopts == "RESET":
                dbh.configClear()  # Clear it in the DB
                self.config = deepcopy(self.defaultConfig)  # Clear in memory
            else:
                useropts = json.loads(allopts)
                cleanopts = dict()
                for opt in useropts.keys():
                    cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0]

                currentopts = deepcopy(self.config)

                # Make a new config where the user options override
                # the current system config.
                sf = SpiderFoot(self.config)
                self.config = sf.configUnserialize(cleanopts, currentopts)
                dbh.configSet(sf.configSerialize(currentopts))
        except Exception as e:
            return self.error("Processing one or more of your inputs failed: " + str(e))

        templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup)
        self.token = random.randint(0, 99999999)
        return templ.render(opts=self.config, pageid='SETTINGS', updated=True,
                            docroot=self.docroot, token=self.token)
Exemple #6
0
 def __init__(self, config):
     self.defaultConfig = deepcopy(config)
     dbh = SpiderFootDb(config)
     # 'config' supplied will be the defaults, let's supplement them
     # now with any configuration which may have previously been
     # saved.
     sf = SpiderFoot(config)
     self.config = sf.configUnserialize(dbh.configGet(), config)
Exemple #7
0
    def scanelementtypediscovery(self, id, eventType):
        keepGoing = True
        sf = SpiderFoot(self.config)
        dbh = SpiderFootDb(self.config)
        pc = dict()
        datamap = dict()

        # Get the events we will be tracing back from
        leafSet = dbh.scanResultEvent(id, eventType)

        # Get the first round of source IDs for the leafs
        nextIds = list()
        for row in leafSet:
            # these must be unique values!
            parentId = row[9]
            childId = row[8]
            datamap[childId] = row

            if pc.has_key(parentId):
                if childId not in pc[parentId]:
                    pc[parentId].append(childId)
            else:
                pc[parentId] = [ childId ]

            # parents of the leaf set
            if parentId not in nextIds:
                nextIds.append(parentId)

        while keepGoing:
            parentSet = dbh.scanElementSources(id, nextIds)
            nextIds = list()
            keepGoing = False

            for row in parentSet:
                parentId = row[9]
                childId = row[8]
                datamap[childId] = row
                #print childId + " = " + str(row)

                if pc.has_key(parentId):
                    if childId not in pc[parentId]:
                        pc[parentId].append(childId)
                else:
                    pc[parentId] = [ childId ]
                if parentId not in nextIds:
                    nextIds.append(parentId)

                # Prevent us from looping at root
                if parentId != "ROOT":
                    keepGoing = True

        datamap[parentId] = row

        #print pc
        retdata = dict()
        retdata['tree'] = sf.dataParentChildToTree(pc)
        retdata['data'] = datamap
        return json.dumps(retdata, ensure_ascii=False)
Exemple #8
0
    def startscan(self, scanname, scantarget, modulelist, typelist):
        modopts = dict() # Not used yet as module options are set globally
        modlist = list()
        sf = SpiderFoot(self.config)
        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()

        [scanname, scantarget] = self.cleanUserInput([scanname, scantarget])

        if scanname == "" or scantarget == "":
            return self.error("Form incomplete.")

        if typelist == "" and modulelist == "":
            return self.error("Form incomplete.")

        if modulelist != "":
            modlist = modulelist.replace('module_', '').split(',')
        else:
            typesx = typelist.replace('type_', '').split(',')
            # 1. Find all modules that produce the requested types
            modlist = sf.modulesProducing(typesx)
            newmods = deepcopy(modlist)
            newmodcpy = deepcopy(newmods)
            # 2. For each type those modules consume, get modules producing
            while len(newmodcpy) > 0:
                for etype in sf.eventsToModules(newmodcpy):
                    xmods = sf.modulesProducing([etype])
                    for mod in xmods:
                        if mod not in modlist:
                            modlist.append(mod)
                            newmods.append(mod)
                newmodcpy = deepcopy(newmods)
                newmods = list()

        # Add our mandatory storage module..
        if "sfp__stor_db" not in modlist:
            modlist.append("sfp__stor_db")
        modlist.sort()

        # For now we don't permit multiple simultaneous scans
        for thread in threading.enumerate():
            if thread.name.startswith("SF_"):
                templ = Template(filename='dyn/newscan.tmpl', lookup=self.lookup)
                return templ.render(modules=self.config['__modules__'], 
                    alreadyRunning=True, runningScan=thread.name[3:], 
                    types=types, pageid="NEWSCAN")

        # Start running a new scan
        self.scanner = SpiderFootScanner(scanname, scantarget.lower(), modlist, 
            self.config, modopts)
        t = threading.Thread(name="SF_" + scanname, target=self.scanner.startScan)
        t.start()

        templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup)
        return templ.render(id=self.scanner.myId, name=scanname, 
            status=self.scanner.status, pageid="SCANLIST")
 def scanviz(self, id, gexf="0"):
     types = list()
     dbh = SpiderFootDb(self.config)
     sf = SpiderFoot(self.config)
     data = dbh.scanResultEvent(id, filterFp=True)
     scan = dbh.scanInstanceGet(id)
     root = scan[1]
     if gexf != "0":
         cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
         cherrypy.response.headers['Content-Type'] = "application/gexf"
         cherrypy.response.headers['Pragma'] = "no-cache"
         return sf.buildGraphGexf([root], "SpiderFoot Export", data)
     else:
         return sf.buildGraphJson([root], data)
Exemple #10
0
 def optsexport(self, pattern):
     sf = SpiderFoot(self.config)
     conf = sf.configSerialize(self.config)
     content = ""
     for opt in sorted(conf):
         if ":_" in opt or opt.startswith("_"):
             continue
         if not pattern:
             content += opt + "=" + str(conf[opt]) + "\n"
         else:
             if pattern in opt:
                 content += opt + "=" + str(conf[opt]) + "\n"
     cherrypy.response.headers['Content-Disposition'] = 'attachment; filename="SpiderFoot.cfg"'
     cherrypy.response.headers['Content-Type'] = "text/plain"
     return content
    def scanelementtypediscovery(self, id, eventType):
        sf = SpiderFoot(self.config)
        dbh = SpiderFootDb(self.config)
        pc = dict()
        datamap = dict()

        # Get the events we will be tracing back from
        leafSet = dbh.scanResultEvent(id, eventType)
        [datamap, pc] = dbh.scanElementSourcesAll(id, leafSet)

        # Delete the ROOT key as it adds no value from a viz perspective
        del pc['ROOT']
        retdata = dict()
        retdata['tree'] = sf.dataParentChildToTree(pc)
        retdata['data'] = datamap

        return json.dumps(retdata, ensure_ascii=False)
Exemple #12
0
    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        return
Exemple #13
0
    def savesettings(self, allopts, token, configFile=None):
        if str(token) != str(self.token):
            return self.error("Invalid token (" + str(self.token) + ").")

        if configFile:  # configFile seems to get set even if a file isn't uploaded
            if configFile.file:
                contents = configFile.file.read()
                try:
                    tmp = dict()
                    for line in contents.split("\n"):
                        if "=" not in line:
                            continue
                        l = line.strip().split("=")
                        if len(l) == 1:
                            l[1] = ""
                        tmp[l[0]] = l[1]
                    allopts = json.dumps(tmp)
                except BaseException as e:
                    return self.error("Failed to parse input file. Was it generated from SpiderFoot? (" + str(e) + ")")

        try:
            dbh = SpiderFootDb(self.config)
            # Reset config to default
            if allopts == "RESET":
                dbh.configClear()  # Clear it in the DB
                self.config = deepcopy(self.defaultConfig)  # Clear in memory
            else:
                useropts = json.loads(allopts)
                cleanopts = dict()
                for opt in useropts.keys():
                    cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0]

                currentopts = deepcopy(self.config)

                # Make a new config where the user options override
                # the current system config.
                sf = SpiderFoot(self.config)
                self.config = sf.configUnserialize(cleanopts, currentopts)
                dbh.configSet(sf.configSerialize(self.config))
        except Exception as e:
            return self.error("Processing one or more of your inputs failed: " + str(e))

        templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup)
        self.token = random.randint(0, 99999999)
        return templ.render(opts=self.config, pageid='SETTINGS', updated=True,
                            docroot=self.docroot, token=self.token)
    def scanvizmulti(self, ids, gexf="1"):
        types = list()
        dbh = SpiderFootDb(self.config)
        sf = SpiderFoot(self.config)
        data = list()
        roots = list()
        for id in ids.split(','):
            data = data + dbh.scanResultEvent(id, filterFp=True)
            roots.append(dbh.scanInstanceGet(id)[1])

        if gexf != "0":
            cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
            cherrypy.response.headers['Content-Type'] = "application/gexf"
            cherrypy.response.headers['Pragma'] = "no-cache"
            return sf.buildGraphGexf(roots, "SpiderFoot Export", data)
        else:
            # Not implemented yet
            return None
Exemple #15
0
    def rerunscan(self, id):
        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modopts = dict() # Not used yet as module options are set globally
        modlist = list()
        sf = SpiderFoot(cfg)
        dbh = SpiderFootDb(cfg)
        info = dbh.scanInstanceGet(id)
        scanconfig = dbh.scanConfigGet(id)
        scanname = info[0]
        scantarget = info[1]
        targetType = None

        if len(scanconfig) == 0:
            return self.error("Something went wrong internally.")

        modlist = scanconfig['_modulesenabled'].split(',')

        targetType = sf.targetType(scantarget)
        if targetType == None:
            # It must then be a name, as a re-run scan should always have a clean
            # target.
            targetType = "HUMAN_NAME"

        if targetType != "HUMAN_NAME":
            scantarget = scantarget.lower()

        # Start running a new scan
        newId = sf.genScanInstanceGUID(scanname)
        t = SpiderFootScanner(scanname, scantarget, targetType, newId,
            modlist, cfg, modopts)
        t.start()

        # Wait until the scan has initialized
        while globalScanStatus.getStatus(newId) == None:
            print "[info] Waiting for the scan to initialize..."
            time.sleep(1)

        templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup)
        return templ.render(id=newId, name=unicode(scanname, 'utf-8', errors='replace'), docroot=self.docroot,
            status=globalScanStatus.getStatus(newId), pageid="SCANLIST")
Exemple #16
0
    def rerunscanmulti(self, ids):
        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modopts = dict() # Not used yet as module options are set globally
        modlist = list()
        sf = SpiderFoot(cfg)
        dbh = SpiderFootDb(cfg)

        for id in ids.split(","):
            info = dbh.scanInstanceGet(id)
            scanconfig = dbh.scanConfigGet(id)
            scanname = info[0]
            scantarget = info[1]
            targetType = None

            if len(scanconfig) == 0:
                return self.error("Something went wrong internally.")

            modlist = scanconfig['_modulesenabled'].split(',')

            targetType = sf.targetType(scantarget)
            if targetType == None:
                # Should never be triggered for a re-run scan..
                return self.error("Invalid target type. Could not recognize it as " + \
                                  "a human name, IP address, IP subnet, ASN, domain name or host name.")

            # Start running a new scan
            newId = sf.genScanInstanceGUID(scanname)
            t = SpiderFootScanner(unicode(scanname, 'utf-8', errors='replace'), 
                                  unicode(scantarget, 'utf-8', errors='replace').lower(), 
                                  targetType, newId, modlist, cfg, modopts)
            t.start()

            # Wait until the scan has initialized
            while globalScanStatus.getStatus(newId) == None:
                print "[info] Waiting for the scan to initialize..."
                time.sleep(1)

        templ = Template(filename='dyn/scanlist.tmpl', lookup=self.lookup)
        return templ.render(rerunscans=True, docroot=self.docroot, pageid="SCANLIST")
Exemple #17
0
    def __init__(self, config):
        self.defaultConfig = deepcopy(config)
        dbh = SpiderFootDb(config)
        # 'config' supplied will be the defaults, let's supplement them
        # now with any configuration which may have previously been
        # saved.
        sf = SpiderFoot(config)
        self.config = sf.configUnserialize(dbh.configGet(), config)

        if self.config['__webaddr'] == "0.0.0.0":
            addr = "<IP of this host>"
        else:
            addr = self.config['__webaddr']

        print ""
        print ""
        print "*************************************************************"
        print " Use SpiderFoot by starting your web browser of choice and "
        print " browse to http://" + addr + ":" + str(self.config['__webport'])
        print "*************************************************************"
        print ""
        print ""
Exemple #18
0
    def savesettings(self, allopts):
        try:
            dbh = SpiderFootDb(self.config)
            # Reset config to default
            if allopts == "RESET":
                dbh.configClear() # Clear it in the DB
                self.config = deepcopy(self.defaultConfig) # Clear in memory
            else:
                useropts = json.loads(allopts)
                currentopts = deepcopy(self.config)

                # Make a new config where the user options override
                # the current system config.
                sf = SpiderFoot(self.config)
                self.config = sf.configUnserialize(useropts, currentopts)

                dbh.configSet(sf.configSerialize(currentopts))
        except Exception as e:
            return self.error("Processing one or more of your inputs failed: " + str(e))

        templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup)
        return templ.render(opts=self.config, pageid='SETTINGS', updated=True)
Exemple #19
0
    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        # Override the default DNS server
        if self.config['_dnsserver'] != "":
            resolver = dns.resolver.Resolver()
            resolver.nameservers = [ self.config['_dnsserver'] ]
            dns.resolver.override_system_resolver(resolver)
        else:
            dns.resolver.restore_system_resolver()

        return
Exemple #20
0
    def __init__(self, opts, init=False):
        self.sf = SpiderFoot(opts)

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10)
        if dbh is None:
            self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database'])
        dbh.text_factory = str

        self.conn = dbh
        self.dbh = dbh.cursor()

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
            self.conn.create_function("REGEXP", 2, __dbregex__)
        except sqlite3.Error:
            # .. If not set up, we set it up.
            try:
                self.create()
                init = True
            except BaseException as e:
                self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0])
            return

        if init:
            print "Attempting to verify database and update if necessary..."
            for qry in self.createTypeQueries:
                try:
                    self.dbh.execute(qry)
                    self.conn.commit()
                except BaseException as e:
                    continue
            self.conn.commit()
    def test_handleEvent_domain_whois_event_data_not_containing_webframework_string_should_not_create_event(
            self):
        sf = SpiderFoot(self.default_options)

        module = sfp_webframework()
        module.setup(sf, dict())

        target_value = 'spiderfoot.net'
        target_type = 'INTERNET_NAME'
        target = SpiderFootTarget(target_value, target_type)
        module.setTarget(target)

        def new_notifyListeners(self, event):
            raise Exception(f"Raised event {event.eventType}: {event.data}")

        module.notifyListeners = new_notifyListeners.__get__(
            module, sfp_webframework)

        event_type = 'ROOT'
        event_data = 'example data'
        event_module = ''
        source_event = ''
        evt = SpiderFootEvent(event_type, event_data, event_module,
                              source_event)

        event_type = 'TARGET_WEB_CONTENT'
        event_data = 'example data'
        event_module = 'example module'
        source_event = evt
        evt = SpiderFootEvent(event_type, event_data, event_module,
                              source_event)
        evt.actualSource = "https://spiderfoot.net/"

        result = module.handleEvent(evt)

        self.assertIsNone(result)
    def test_handleEvent_event_data_affiliate_internet_name_not_matching_ad_server_should_not_return_event(self):
        sf = SpiderFoot(self.default_options)

        module = sfp_stevenblack_hosts()
        module.setup(sf, dict())

        target_value = 'spiderfoot.net'
        target_type = 'INTERNET_NAME'
        target = SpiderFootTarget(target_value, target_type)
        module.setTarget(target)

        module.opts['_fetchtimeout'] = 15
        module.optdescs['_fetchtimeout'] = ''
        module.opts['_useragent'] = ''
        module.optdescs['_useragent'] = ''

        def new_notifyListeners(self, event):
            raise Exception(f"Raised event {event.eventType}: {event.data}")

        module.notifyListeners = new_notifyListeners.__get__(module, sfp_stevenblack_hosts)

        event_type = 'ROOT'
        event_data = 'example data'
        event_module = ''
        source_event = ''
        evt = SpiderFootEvent(event_type, event_data, event_module, source_event)

        event_type = 'AFFILIATE_INTERNET_NAME'
        event_data = 'no.ads.safe.local'
        event_module = 'example module'
        source_event = evt

        evt = SpiderFootEvent(event_type, event_data, event_module, source_event)
        result = module.handleEvent(evt)

        self.assertIsNone(result)
    def test_handleEvent_no_api_key_should_set_errorState(self):
        """
        Test handleEvent(self, event)
        """
        sf = SpiderFoot(self.default_options)

        module = sfp_etherscan()
        module.setup(sf, dict())

        target_value = 'example target value'
        target_type = 'IP_ADDRESS'
        target = SpiderFootTarget(target_value, target_type)
        module.setTarget(target)

        event_type = 'ROOT'
        event_data = 'example data'
        event_module = ''
        source_event = ''
        evt = SpiderFootEvent(event_type, event_data, event_module, source_event)

        result = module.handleEvent(evt)

        self.assertIsNone(result)
        self.assertTrue(module.errorState)
Exemple #24
0
    def scanopts(self, id):
        ret = dict()
        dbh = SpiderFootDb(self.config)
        ret['config'] = dbh.scanConfigGet(id)
        ret['configdesc'] = dict()
        for key in ret['config'].keys():
            if ':' not in key:
                ret['configdesc'][key] = self.config['__globaloptdescs__'][key]
            else:
                [modName, modOpt] = key.split(':')
                if modName not in self.config['__modules__'].keys():
                    continue

                if modOpt not in self.config['__modules__'][modName][
                        'optdescs'].keys():
                    continue

                ret['configdesc'][key] = self.config['__modules__'][modName][
                    'optdescs'][modOpt]

        sf = SpiderFoot(self.config)
        meta = dbh.scanInstanceGet(id)
        if meta[3] != 0:
            started = time.strftime("%Y-%m-%d %H:%M:%S",
                                    time.localtime(meta[3]))
        else:
            started = "Not yet"

        if meta[4] != 0:
            finished = time.strftime("%Y-%m-%d %H:%M:%S",
                                     time.localtime(meta[4]))
        else:
            finished = "Not yet"
        ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]]

        return json.dumps(ret)
    def test_handleEvent(self):
        """
        Test handleEvent(self, event)
        """
        sf = SpiderFoot(self.default_options)

        module = sfp_venmo()
        module.setup(sf, dict())

        target_value = 'example target value'
        target_type = 'IP_ADDRESS'
        target = SpiderFootTarget(target_value, target_type)
        module.setTarget(target)

        event_type = 'ROOT'
        event_data = 'example data'
        event_module = ''
        source_event = ''
        evt = SpiderFootEvent(event_type, event_data, event_module,
                              source_event)

        result = module.handleEvent(evt)

        self.assertIsNone(result)
Exemple #26
0
    def test_fetchUrl_argument_url_invalid_url_should_return_None(self):
        """
        Test fetchUrl(self, url, fatal=False, cookies=None, timeout=30,
                 useragent="SpiderFoot", headers=None, noLog=False,
                 postData=None, dontMangle=False, sizeLimit=None,
                 headOnly=False, verify=False)
        """
        sf = SpiderFoot(self.default_options)

        res = sf.fetchUrl("")
        self.assertEqual(None, res)

        res = sf.fetchUrl("://spiderfoot.net/")
        self.assertEqual(None, res)

        res = sf.fetchUrl("file:///etc/hosts")
        self.assertEqual(None, res)

        res = sf.fetchUrl("irc://spiderfoot.net:6697/")
        self.assertEqual(None, res)
    def test_valid_email_should_return_a_boolean(self):
        """
        Test validEmail(self, email)
        """
        sf = SpiderFoot(dict())

        valid_email = sf.validEmail(None)
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = sf.validEmail([])
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = sf.validEmail('root@localhost')
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = sf.validEmail('*****@*****.**')
        self.assertIsInstance(valid_email, bool)
        self.assertTrue(valid_email)
    def test_is_domain_invalid_domain_should_return_false(self):
        """
        Test isDomain(self, hostname, tldList)
        """
        sf = SpiderFoot(self.default_options)
        sf.opts['_internettlds'] = self.test_tlds

        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                is_domain = sf.isDomain(invalid_type, sf.opts.get('_internettlds'))
                self.assertIsInstance(is_domain, bool)
                self.assertFalse(is_domain)

        is_domain = sf.isDomain("local", sf.opts.get('_internettlds'))
        self.assertIsInstance(is_domain, bool)
        self.assertFalse(is_domain)

        is_domain = sf.isDomain("spiderfoot.net\n.com", sf.opts.get('_internettlds'))
        self.assertIsInstance(is_domain, bool)
        self.assertFalse(is_domain)
Exemple #29
0
    def test_host_domain_should_return_a_string(self):
        """
        Test hostDomain(self, hostname, tldList)
        """
        sf = SpiderFoot(self.default_options)
        sf.opts['_internettlds'] = self.test_tlds

        host_domain = sf.hostDomain('www.spiderfoot.net',
                                    sf.opts.get('_internettlds'))
        self.assertIsInstance(host_domain, str)
        self.assertEqual('spiderfoot.net', host_domain)

        host_domain = sf.hostDomain('spiderfoot.net',
                                    sf.opts.get('_internettlds'))
        self.assertIsInstance(host_domain, str)
        self.assertEqual('spiderfoot.net', host_domain)

        host_domain = sf.hostDomain('abc.www.spiderfoot.net',
                                    sf.opts.get('_internettlds'))
        self.assertIsInstance(host_domain, str)
        self.assertEqual('spiderfoot.net', host_domain)
Exemple #30
0
    def test_validPhoneNumber_should_return_a_boolean(self):
        """
        Test validPhoneNumber(self, phone)
        """
        sf = SpiderFoot(dict())

        invalid_types = [None, "", list(), dict(), int()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                valid_phone = sf.validPhoneNumber(invalid_type)
                self.assertIsInstance(valid_phone, bool)
                self.assertFalse(valid_phone)

        valid_phone = sf.validPhoneNumber('+1234567890')
        self.assertIsInstance(valid_phone, bool)
        self.assertFalse(valid_phone)

        valid_phone = sf.validPhoneNumber('+12345678901234567890')
        self.assertIsInstance(valid_phone, bool)
        self.assertFalse(valid_phone)

        valid_phone = sf.validPhoneNumber('+12345678901')
        self.assertIsInstance(valid_phone, bool)
        self.assertTrue(valid_phone)
    def test_parse_iban_numbers_should_return_a_list(self):
        """
        Test parseIBANNumbers(self, data)
        """
        sf = SpiderFoot(self.default_options)

        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                ibans = sf.parseIBANNumbers(invalid_type)
                self.assertIsInstance(ibans, list)

        # Example IBANS from https://www.iban.com/structure
        ibans = [
            "AL35202111090000000001234567",
            "AD1400080001001234567890",
            "AT483200000012345864",
            "AZ96AZEJ00000000001234567890",
            "BH02CITI00001077181611",
            "BY86AKBB10100000002966000000",
            "BE71096123456769",
            "BA393385804800211234",
            "BR1500000000000010932840814P2",
            "BG18RZBB91550123456789",
            "CR23015108410026012345",
            "HR1723600001101234565",
            "CY21002001950000357001234567",
            "CZ5508000000001234567899",
            "DK9520000123456789",
            "DO22ACAU00000000000123456789",
            "EG800002000156789012345180002",
            "SV43ACAT00000000000000123123",
            "EE471000001020145685",
            "FO9264600123456789",
            "FI1410093000123458",
            "FR7630006000011234567890189",
            "GE60NB0000000123456789",
            "DE75512108001245126199",
            "GI04BARC000001234567890",
            "GR9608100010000001234567890",
            "GL8964710123456789",
            "GT20AGRO00000000001234567890",
            "VA59001123000012345678",
            "HU93116000060000000012345676",
            "IS750001121234563108962099",
            "IQ20CBIQ861800101010500",
            "IE64IRCE92050112345678",
            "IL170108000000012612345",
            "IT60X0542811101000000123456",
            "JO71CBJO0000000000001234567890",
            "KZ563190000012344567",
            "XK051212012345678906",
            "KW81CBKU0000000000001234560101",
            "LV97HABA0012345678910",
            "LB92000700000000123123456123",
            "LI7408806123456789012",
            "LT601010012345678901",
            "LU120010001234567891",
            "MT31MALT01100000000000000000123",
            "MR1300020001010000123456753",
            "MU43BOMM0101123456789101000MUR",
            "MD21EX000000000001234567",
            "MC5810096180790123456789085",
            "ME25505000012345678951",
            "NL02ABNA0123456789",
            "MK07200002785123453",
            "NO8330001234567",
            "PK36SCBL0000001123456702",
            "PS92PALS000000000400123456702",
            "PL10105000997603123456789123",
            "PT50002700000001234567833",
            "QA54QNBA000000000000693123456",
            "RO09BCYP0000001234567890",
            "LC14BOSL123456789012345678901234",
            "SM76P0854009812123456789123",
            "ST23000200000289355710148",
            "SA4420000001234567891234",
            "RS35105008123123123173",
            "SC52BAHL01031234567890123456USD",
            "SK8975000000000012345671",
            "SI56192001234567892",
            "ES7921000813610123456789",
            "SE7280000810340009783242",
            "CH5604835012345678009",
            "TL380010012345678910106",
            "TN5904018104004942712345",
            "TR320010009999901234567890",
            "UA903052992990004149123456789",
            "AE460090000000123456789",
            "GB33BUKB20201555555555",
            "VG21PACG0000000123456789"
        ]
        for iban in ibans:
            with self.subTest(iban=iban):
                parse_ibans = sf.parseIBANNumbers(iban)
                self.assertIsInstance(parse_ibans, list)
                self.assertIn(iban, parse_ibans)

        # Invalid IBANs
        ibans = [
            # Invalid country code
            "ZZ21PACG0000000123456789",
            # Invalid length for country code
            "VG123456789012345",
            # Invalid mod 97 remainder
            "VG21PACG0000000123456111"
        ]
        for iban in ibans:
            with self.subTest(iban=iban):
                parse_ibans = sf.parseIBANNumbers(iban)
                self.assertIsInstance(parse_ibans, list)
                self.assertNotIn(iban, parse_ibans)
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_talosintel()
     module.setup(sf, dict())
Exemple #33
0
        sfConfig['_debug'] = True
    else:
        sfConfig['_debug'] = False

    if args.q or args.o == "json":
        sfConfig['__logging'] = False

    if args.l:
        (addr, port) = args.l.split(":")
        sfConfig['__webaddr'] = addr
        sfConfig['__webport'] = int(port)
    else:
        sfConfig['__logstdout'] = True

    sfModules = dict()
    sft = SpiderFoot(sfConfig)
    # Go through each module in the modules directory with a .py extension
    for filename in os.listdir(sft.myPath() + '/modules/'):
        if filename.startswith("sfp_") and filename.endswith(".py"):
            # Skip the module template and debugging modules
            if filename == "sfp_template.py" or filename == 'sfp_stor_print.py':
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(),
                             [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            sfModules[modName]['name'] = sfModules[modName][
                'object'].__doc__.split(":", 5)[0]
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_openphish()
     module.setup(sf, dict())
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_dnsresolve()
     module.setup(sf, dict())
Exemple #36
0
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_etherscan()
     module.setup(sf, dict())
Exemple #37
0
 def index(self):
     sf = SpiderFoot(self.config)
     # Look for referenced templates in the current directory only
     templ = Template(filename='dyn/setup.tmpl', lookup=self.lookup)
     return templ.render(stage=1, config=self.config, path=os.path.dirname(sf.myPath()))
    def startscan(self, scanname, scantarget, modulelist, typelist):
        global globalScanStatus

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modopts = dict()  # Not used yet as module options are set globally
        modlist = list()
        sf = SpiderFoot(cfg)
        dbh = SpiderFootDb(cfg)
        types = dbh.eventTypes()
        targetType = None
        [scanname, scantarget] = self.cleanUserInput([scanname, scantarget])

        if scanname == "" or scantarget == "":
            return self.error("Form incomplete.")

        if typelist == "" and modulelist == "":
            return self.error("Form incomplete.")

        if modulelist != "":
            modlist = modulelist.replace('module_', '').split(',')
        else:
            typesx = typelist.replace('type_', '').split(',')
            # 1. Find all modules that produce the requested types
            modlist = sf.modulesProducing(typesx)
            newmods = deepcopy(modlist)
            newmodcpy = deepcopy(newmods)
            # 2. For each type those modules consume, get modules producing
            while len(newmodcpy) > 0:
                for etype in sf.eventsToModules(newmodcpy):
                    xmods = sf.modulesProducing([etype])
                    for mod in xmods:
                        if mod not in modlist:
                            modlist.append(mod)
                            newmods.append(mod)
                newmodcpy = deepcopy(newmods)
                newmods = list()

        # Add our mandatory storage module..
        if "sfp__stor_db" not in modlist:
            modlist.append("sfp__stor_db")
        modlist.sort()

        targetType = sf.targetType(scantarget)
        if targetType is None:
            return self.error("Invalid target type. Could not recognize it as " + \
                              "an IP address, IP subnet, domain name or host name.")

        # Start running a new scan
        scanId = sf.genScanInstanceGUID(scanname)
        t = SpiderFootScanner(scanname, scantarget.lower(), targetType, scanId,
                              modlist, cfg, modopts)
        t.start()

        # Wait until the scan has initialized
        while globalScanStatus.getStatus(scanId) is None:
            print "[info] Waiting for the scan to initialize..."
            time.sleep(1)

        templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup)
        return templ.render(id=scanId, name=scanname, docroot=self.docroot,
                            status=globalScanStatus.getStatus(scanId), pageid="SCANLIST")
Exemple #39
0
class SpiderFootScanner:
    moduleInstances = None
    status = "UNKNOWN"
    myId = None

    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        return

    # Status of the currently running scan (if any)
    def scanStatus(self, id):
        if id != self.myId:
            return "UNKNOWN"
        return self.status  

    # Stop a scan (id variable is unnecessary for now given that only one simultaneous
    # scan is permitted.)
    def stopScan(self, id):
        if id != self.myId:
            return None

        if self.moduleInstances == None:
            return None

        for modName in self.moduleInstances.keys():
            self.moduleInstances[modName].stopScanning()

    # Start running a scan
    def startScan(self):
        self.moduleInstances = dict()
        dbh = SpiderFootDb(self.config)
        self.sf.setDbh(dbh)
        aborted = False

        # Create a unique ID for this scan and create it in the back-end DB.
        self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target)
        self.sf.setScanId(self.config['__guid__'])
        self.myId = self.config['__guid__']
        dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target)
        dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING')
        self.status = "STARTING"
        
        # Save the config current set for this scan
        self.config['_modulesenabled'] = self.moduleList
        dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config))

        self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.")
        # moduleList = list of modules the user wants to run
        try:
            for modName in self.moduleList:
                if modName == '':
                    continue

                module = __import__('modules.' + modName, globals(), locals(), [modName])
                mod = getattr(module, modName)()
                mod.__name__ = modName

                # A bit hacky: we pass the database object as part of the config. This
                # object should only be used by the internal SpiderFoot modules writing
                # to the database, which at present is only sfp_stor_db.
                # Individual modules cannot create their own SpiderFootDb instance or
                # we'll get database locking issues, so it all goes through this.
                self.config['__sfdb__'] = dbh

                # Set up the module
                # Configuration is a combined global config with module-specific options
                #modConfig = deepcopy(self.config)
                modConfig = self.config['__modules__'][modName]['opts']
                for opt in self.config.keys():
                    modConfig[opt] = self.config[opt]

                mod.clearListeners() # clear any listener relationships from the past
                mod.setup(self.sf, self.target, modConfig)
                self.moduleInstances[modName] = mod
                self.sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in self.moduleInstances.values():
                for listenerModule in self.moduleInstances.values():
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() != None:
                        module.registerListener(listenerModule)

            dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING')
            self.status = "RUNNING"

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI")
            dbh.scanEventStore(self.config['__guid__'], rootEvent)

            # Start the modules sequentially.
            for module in self.moduleInstances.values():
                # Check in case the user requested to stop the scan between modules initializing
                if module.checkForStop():
                    dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING')
                    self.status = "ABORTING"
                    aborted = True
                    break
                # Many modules' start() method will return None, as most will rely on 
                # notifications during the scan from other modules.
                module.start()

            # Check if any of the modules ended due to being stopped
            for module in self.moduleInstances.values():
                if module.checkForStop():
                    aborted = True

            if aborted:
                self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED')
                self.status = "ABORTED"
            else:
                self.sf.status("Scan [" + self.config['__guid__'] + "] completed.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED')
                self.status = "FINISHED"
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.sf.error("Unhandled exception encountered during scan. " + \
                "Please report this as a bug: " + \
                repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e))
            dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED')
            self.status = "ERROR-FAILED"

        self.moduleInstances = None
        dbh.close()
        self.sf.setDbh(None)
        self.sf.setScanId(None)
Exemple #40
0
class SpiderFootDb:
    sf = None
    dbh = None
    conn = None

    # Queries for creating the SpiderFoot database
    createQueries = [
            "PRAGMA journal_mode=WAL",
            "CREATE TABLE tbl_event_types ( \
                event       VARCHAR NOT NULL PRIMARY KEY, \
                event_descr VARCHAR NOT NULL, \
                event_raw   INT NOT NULL DEFAULT 0 \
            )",
            "CREATE TABLE tbl_config ( \
                scope   VARCHAR NOT NULL, \
                opt     VARCHAR NOT NULL, \
                val     VARCHAR NOT NULL, \
                PRIMARY KEY (scope, opt) \
            )",
            "CREATE TABLE tbl_scan_instance ( \
                guid        VARCHAR NOT NULL PRIMARY KEY, \
                name        VARCHAR NOT NULL, \
                seed_target VARCHAR NOT NULL, \
                created     INT DEFAULT 0, \
                started     INT DEFAULT 0, \
                ended       INT DEFAULT 0, \
                status      VARCHAR NOT NULL \
            )",
            "CREATE TABLE tbl_scan_log ( \
                scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
                generated           INT NOT NULL, \
                component           VARCHAR, \
                type                VARCHAR NOT NULL, \
                message             VARCHAR \
            )",
            "CREATE TABLE tbl_scan_config ( \
                scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
                component           VARCHAR NOT NULL, \
                opt                 VARCHAR NOT NULL, \
                val                 VARCHAR NOT NULL \
            )",
            "CREATE TABLE tbl_scan_results ( \
                scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
                hash                VARCHAR NOT NULL, \
                type                VARCHAR NOT NULL REFERENCES tbl_event_types(event), \
                generated           INT NOT NULL, \
                confidence          INT NOT NULL DEFAULT 100, \
                visibility          INT NOT NULL DEFAULT 100, \
                risk                INT NOT NULL DEFAULT 0, \
                module              VARCHAR NOT NULL, \
                data                VARCHAR, \
                source_event_hash  VARCHAR DEFAULT 'ROOT' \
            )",
            "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)",
            "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)",
            "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)",
            "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)",
            "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_IP_SUBNET', 'Affiliate - IP Address - Subnet', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEVICE_TYPE', 'Device Type', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DNS_TEXT', 'DNS TXT Record', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DOMAIN_NAME', 'Domain Name', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('EMAILADDR', 'Email Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('GEOINFO', 'Physical Location', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('HTTP_CODE', 'HTTP Status Code', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('HUMAN_NAME', 'Human Name', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('INTERESTING_FILE', 'Interesting File', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('JUNK_FILE', 'Junk File', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('INTERNET_NAME', 'Internet Name', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('IP_ADDRESS', 'IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_NETBLOCK', 'Owned Netblock with Malicious IP', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('OPERATING_SYSTEM', 'Operating System', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PASTEBIN_CONTENT', 'PasteBin Content', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_FORM', 'URL (Form)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java applet)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_STATIC', 'URL (Purely Static)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_BANNER', 'Web Server', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0)",
            "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0)"
    ]

    def __init__(self, opts):
        self.sf = SpiderFoot(opts)

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10)
        if dbh == None:
            self.sf.fatal("Could not connect to internal database, and couldn't create " + \
                opts['__database'])
        dbh.text_factory = str

        self.conn = dbh
        self.dbh = dbh.cursor()

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
            self.conn.create_function("REGEXP", 2, __dbregex__)           
        except sqlite3.Error:
            # .. If not set up, we set it up.
            try:
                self.create()
            except BaseException as e:
                self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + \
                    e.args[0])
        return

    #
    # Back-end database operations
    #

    # Create the back-end schema
    def create(self):
        try:
            for qry in self.createQueries:
                self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            raise BaseException("SQL error encountered when setting up database: " +
                e.args[0])

    # Close the database handle
    def close(self):
        self.dbh.close()

    # Search results
    # criteria is search criteria such as:
    #  - scan_id (search within a scan, if omitted search all)
    #  - type (search a specific type, if omitted search all)
    #  - value (search values for a specific string, if omitted search all)
    #  - regex (search values for a regular expression)
    # ** at least two criteria must be set **
    def search(self, criteria):
        if criteria.values().count(None) == 3:
            return False

        qvars = list()
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, c.scan_instance_id \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.source_event_hash = s.hash "

        if criteria.get('scan_id') != None:
            qry += "AND c.scan_instance_id = ? "
            qvars.append(criteria['scan_id'])

        if criteria.get('type') != None:
            qry += " AND c.type = ? "
            qvars.append(criteria['type'])

        if criteria.get('value') != None:
            qry += " AND c.data LIKE ? "
            qvars.append(criteria['value'])

        if criteria.get('regex') != None:
            qry += " AND c.data REGEXP ? "
            qvars.append(criteria['regex'])

        qry = qry + " ORDER BY c.data"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching search results: " +
                e.args[0])

    # Get event types
    def eventTypes(self):
        qry = "SELECT event_descr, event, event_raw FROM tbl_event_types"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving event types:" +
                e.args[0])

    # Log an event to the database
    def scanLogEvent(self, instanceId, classification, message, component=None):
        if component == None:
            component = "SpiderFoot"

        qry = "INSERT INTO tbl_scan_log \
            (scan_instance_id, generated, component, type, message) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                    instanceId, time.time() * 1000, component, classification, message
                ))
            self.conn.commit()
        except sqlite3.Error as e:
            if "locked" in e.args[0]:
                # TODO: Do something smarter here to handle locked databases
                self.sf.fatal("Unable to log event in DB: " + e.args[0])
            else:
                self.sf.fatal("Unable to log event in DB: " + e.args[0])

        return True

    # Store a scan instance
    def scanInstanceCreate(self, instanceId, scanName, scanTarget):
        qry = "INSERT INTO tbl_scan_instance \
            (guid, name, seed_target, created, status) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                    instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED'
                ))
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.fatal("Unable to create instance in DB: " + e.args[0])

        return True

    # Update the start time, end time or status (or all 3) of a scan instance
    def scanInstanceSet(self, instanceId, started=None, ended=None, status=None):
        qvars = list()
        qry = "UPDATE tbl_scan_instance SET "

        if started != None:
            qry += " started = ?,"
            qvars.append(started)

        if ended != None:
            qry += " ended = ?,"
            qvars.append(ended)

        if status != None:
            qry += " status = ?,"
            qvars.append(status)

        # guid = guid is a little hack to avoid messing with , placement above
        qry += " guid = guid WHERE guid = ?"
        qvars.append(instanceId)

        try:
            self.dbh.execute(qry, qvars)
            self.conn.commit()
        except sqlite3.Error:
            self.sf.fatal("Unable to set information for the scan instance.")

    # Return info about a scan instance (name, target, created, started,
    # ended, status) - don't need this yet - untested
    def scanInstanceGet(self, instanceId):
        qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \
            ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \
            FROM tbl_scan_instance WHERE guid = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchone()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving scan instance:" +
                e.args[0])

    # Obtain a summary of the results per event type
    def scanResultSummary(self, instanceId):
        qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \
            count(*) AS total, count(DISTINCT r.data) as utotal FROM \
            tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
            AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result summary: " +
                e.args[0])

    # Obtain the ROOT event for a scan: Must be same output as scanResultEvent!
    def scanRootEvent(self, instanceId):
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.source_event_hash = 'ROOT'"

        qvars = [instanceId]

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchone()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching ROOT event: " +
                e.args[0])

    # Obtain the data for a scan and event type
    def scanResultEvent(self, instanceId, eventType='ALL'):
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type"

        qvars = [instanceId]

        if eventType != "ALL":
            qry = qry + " AND c.type = ?"
            qvars.append(eventType)

        qry = qry + " ORDER BY c.data"

        #print "QRY: " + qry

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result events: " +
                e.args[0])

    # Obtain a unique list of elements
    def scanResultEventUnique(self, instanceId, eventType='ALL'):
        qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \
            WHERE scan_instance_id = ?"
        qvars = [instanceId]

        if eventType != "ALL":
            qry = qry + " AND type = ?"
            qvars.append(eventType)

        qry = qry + " GROUP BY type, data ORDER BY COUNT(*)"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching unique result events: " +
                e.args[0])

    # Get scan logs
    def scanLogs(self, instanceId, limit=None):
        qry = "SELECT generated AS generated, component, \
            type, message FROM tbl_scan_log WHERE scan_instance_id = ? \
            ORDER BY generated DESC"
        qvars = [instanceId]

        if limit != None:
            qry = qry + " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan logs: " +
                e.args[0])

    # Get scan errors
    def scanErrors(self, instanceId, limit=None):
        qry = "SELECT generated AS generated, component, \
            message FROM tbl_scan_log WHERE scan_instance_id = ? \
            AND type = 'ERROR' ORDER BY generated DESC"
        qvars = [instanceId]

        if limit != None:
            qry = qry + " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan errors: " +
                e.args[0])

    # Delete a scan instance
    def scanInstanceDelete(self, instanceId):
        qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?"
        qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?"
        qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?"
        qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry1, qvars)
            self.dbh.execute(qry2, qvars)
            self.dbh.execute(qry3, qvars)
            self.dbh.execute(qry4, qvars)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when deleting scan: " +
                e.args[0])

    # Store the default configuration
    def configSet(self, optMap=dict()):
        qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)"
        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [ parts[0], parts[1], optMap[opt] ]
            else:
            # Global option
                qvals = [ "GLOBAL", opt, optMap[opt] ]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " +
                    e.args[0])

            self.conn.commit()

    # Retreive the config from the database
    def configGet(self):
        qry = "SELECT scope, opt, val FROM tbl_config"
        try:
            retval = dict()
            self.dbh.execute(qry)
            for [scope, opt, val] in self.dbh.fetchall():
                if scope == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[scope + ":" + opt] = val

            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Reset the config to default (clear it from the DB and let the hard-coded
    # settings in the code take effect.)
    def configClear(self):
        qry = "DELETE from tbl_config"
        try:
            self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("Unable to clear configuration from the database: " + e.args[0])

    # Store a configuration value for a scan
    def scanConfigSet(self, id, optMap=dict()):
        qry = "REPLACE INTO tbl_scan_config \
                (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)"

        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [ id, parts[0], parts[1], optMap[opt] ]
            else:
            # Global option
                qvals = [ id, "GLOBAL", opt, optMap[opt] ]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " +
                    e.args[0])

            self.conn.commit()

    # Retreive configuration data for a scan component
    def scanConfigGet(self, instanceId):
        qry = "SELECT component, opt, val FROM tbl_scan_config \
                WHERE scan_instance_id = ? ORDER BY component, opt"
        qvars = [instanceId]
        try:
            retval = dict()
            self.dbh.execute(qry, qvars)
            for [component, opt, val] in self.dbh.fetchall():
                if component == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[component + ":" + opt] = val
            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Store an event
    # eventData is a SpiderFootEvent object with the following variables:
    # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc.
    # - generated: time the event occurred
    # - confidence: how sure are we of this data's validity, 0-100
    # - visibility: how 'visible' was this data, 0-100
    # - risk: how much risk does this data represent, 0-100
    # - module: module that generated the event
    # - data: the actual data, i.e. a URL, port number, webpage content, etc.
    # - sourceEventHash: hash of the event that triggered this event
    # And getHash() will return the event hash.
    def scanEventStore(self, instanceId, sfEvent, truncateSize=0):
        storeData = ''

        if type(sfEvent.data) is not unicode:
            # If sfEvent.data is a dict or list, convert it to a string first, as
            # those types do not have a unicode converter.
            if type(sfEvent.data) is str:
                storeData = unicode(sfEvent.data, 'utf-8', errors='replace')
            else:
                try:
                    storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace')
                except BaseException as e:
                    self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data)))
        else:
            storeData = sfEvent.data

        if truncateSize > 0:
            storeData = storeData[0:truncateSize]

        if sfEvent.sourceEventHash in [ "", None]:
            self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!")

        qry = "INSERT INTO tbl_scan_results \
            (scan_instance_id, hash, type, generated, confidence, \
            visibility, risk, module, data, source_event_hash) \
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
        qvals = [ instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated,
            sfEvent.confidence, sfEvent.visibility, sfEvent.risk,
            sfEvent.module, storeData, sfEvent.sourceEventHash ]

        #print "STORING: " + str(qvals)

        try:
            self.dbh.execute(qry, qvals)
            self.conn.commit()
            return None
        except sqlite3.Error as e:
            self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " +
                e.args[0])

    # List of all previously run scans
    def scanInstanceList(self):
        # SQLite doesn't support OUTER JOINs, so we need a work-around that
        # does a UNION of scans with results and scans without results to 
        # get a complete listing.
        qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \
            FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \
            GROUP BY i.guid \
            UNION ALL \
            SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \
            FROM tbl_scan_instance i  WHERE i.guid NOT IN ( \
            SELECT distinct scan_instance_id FROM tbl_scan_results) \
            ORDER BY started DESC"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan list: " + e.args[0])

    # History of data from the scan
    def scanResultHistory(self, instanceId):
        qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \
                type, COUNT(*) FROM tbl_scan_results \
                WHERE scan_instance_id = ? GROUP BY hourmin, type"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan history: " + e.args[0])


    # Get the source IDs, types and data for a set of IDs
    def scanElementSources(self, instanceId, elementIdList):
        # the output of this needs to be aligned with scanResultEvent,
        # as other functions call both expecting the same output.
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.hash in ("
        qvars = [instanceId]

        for hashId in elementIdList:
            qry = qry + "'" + hashId + "',"
        qry = qry + "'')"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0])
Exemple #41
0
class SpiderFootScanner:
    moduleInstances = None
    status = "UNKNOWN"
    myId = None

    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        return

    # Status of the currently running scan (if any)
    def scanStatus(self, id):
        if id != self.myId:
            return "UNKNOWN"
        return self.status  

    # Stop a scan (id variable is unnecessary for now given that only one simultaneous
    # scan is permitted.)
    def stopScan(self, id):
        if id != self.myId:
            return None

        if self.moduleInstances == None:
            return None

        for modName in self.moduleInstances.keys():
            self.moduleInstances[modName].stopScanning()

    # Start running a scan
    def startScan(self):
        self.moduleInstances = dict()
        dbh = SpiderFootDb(self.config)
        self.sf.setDbh(dbh)
        aborted = False

        # Create a unique ID for this scan and create it in the back-end DB.
        self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target)
        self.sf.setScanId(self.config['__guid__'])
        self.myId = self.config['__guid__']
        dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target)
        dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING')
        self.status = "STARTING"
        
        # Save the config current set for this scan
        self.config['_modulesenabled'] = self.moduleList
        dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config))

        self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.")
        # moduleList = list of modules the user wants to run
        try:
            # Process global options that point to other places for data

            # If a SOCKS server was specified, set it up
            if self.config['_socks1type'] != '':
                socksType = socks.PROXY_TYPE_SOCKS4
                socksDns = self.config['_socks6dns']
                socksAddr = self.config['_socks2addr']
                socksPort = int(self.config['_socks3port'])
                socksUsername = ''
                socksPassword = ''

                if self.config['_socks1type'] == '4':
                    socksType = socks.PROXY_TYPE_SOCKS4
                if self.config['_socks1type'] == '5':
                    socksType = socks.PROXY_TYPE_SOCKS5
                    socksUsername = self.config['_socks4user']
                    socksPassword = self.config['_socks5pwd']
                    
                if self.config['_socks1type'] == 'HTTP':
                    socksType = socks.PROXY_TYPE_HTTP
                   
                self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \
                    "(" + socksUsername + ":" + socksPassword + ")")
                socks.setdefaultproxy(socksType, socksAddr, socksPort, 
                    socksDns, socksUsername, socksPassword)

                # Override the default socket and getaddrinfo calls with the 
                # SOCKS ones
                socket.socket = socks.socksocket
                socket.create_connection = socks.create_connection
                socket.getaddrinfo = socks.getaddrinfo

                self.sf.updateSocket(socket)
            
            # Override the default DNS server
            if self.config['_dnsserver'] != "":
                res = dns.resolver.Resolver()
                res.nameservers = [ self.config['_dnsserver'] ]
                dns.resolver.override_system_resolver(res)
            else:
                dns.resolver.restore_system_resolver()

            # Set the user agent
            self.config['_useragent'] = self.sf.optValueToData(self.config['_useragent'])

            # Get internet TLDs
            tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache'])
            # If it wasn't loadable from cache, load it from scratch
            if tlddata == None:
                self.config['_internettlds'] = self.sf.optValueToData(self.config['_internettlds'])
                self.sf.cachePut("internet_tlds", self.config['_internettlds'])
            else:
                self.config["_internettlds"] = tlddata.splitlines()

            for modName in self.moduleList:
                if modName == '':
                    continue

                module = __import__('modules.' + modName, globals(), locals(), [modName])
                mod = getattr(module, modName)()
                mod.__name__ = modName

                # A bit hacky: we pass the database object as part of the config. This
                # object should only be used by the internal SpiderFoot modules writing
                # to the database, which at present is only sfp__stor_db.
                # Individual modules cannot create their own SpiderFootDb instance or
                # we'll get database locking issues, so it all goes through this.
                self.config['__sfdb__'] = dbh

                # Set up the module
                # Configuration is a combined global config with module-specific options
                #modConfig = deepcopy(self.config)
                modConfig = self.config['__modules__'][modName]['opts']
                for opt in self.config.keys():
                    modConfig[opt] = self.config[opt]

                mod.clearListeners() # clear any listener relationships from the past
                mod.setup(self.sf, self.target, modConfig)
                self.moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.config['_socks1type'] != '':
                    mod._updateSocket(socket)

                self.sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in self.moduleInstances.values():
                for listenerModule in self.moduleInstances.values():
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() != None:
                        module.registerListener(listenerModule)

            dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING')
            self.status = "RUNNING"

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI")
            dbh.scanEventStore(self.config['__guid__'], rootEvent)

            # Start the modules sequentially.
            for module in self.moduleInstances.values():
                # Check in case the user requested to stop the scan between modules initializing
                if module.checkForStop():
                    dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING')
                    self.status = "ABORTING"
                    aborted = True
                    break
                # Many modules' start() method will return None, as most will rely on 
                # notifications during the scan from other modules.
                module.start()

            # Check if any of the modules ended due to being stopped
            for module in self.moduleInstances.values():
                if module.checkForStop():
                    aborted = True

            if aborted:
                self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED')
                self.status = "ABORTED"
            else:
                self.sf.status("Scan [" + self.config['__guid__'] + "] completed.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED')
                self.status = "FINISHED"
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \
                "encountered during scan. Please report this as a bug: " + \
                repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e))
            dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED')
            self.status = "ERROR-FAILED"

        self.moduleInstances = None
        dbh.close()
        self.sf.setDbh(None)
        self.sf.setScanId(None)
    def test_target_type(self):
        """
        Test targetType(self, target)
        """
        sf = SpiderFoot(dict())

        target_type = sf.targetType("0.0.0.0")
        self.assertEqual('IP_ADDRESS', target_type)
        target_type = sf.targetType("*****@*****.**")
        self.assertEqual('EMAILADDR', target_type)
        target_type = sf.targetType("0.0.0.0/0")
        self.assertEqual('NETBLOCK_OWNER', target_type)
        target_type = sf.targetType("+1234567890")
        self.assertEqual('PHONE_NUMBER', target_type)
        target_type = sf.targetType('"Human Name"')
        self.assertEqual('HUMAN_NAME', target_type)
        target_type = sf.targetType('"abc123"')
        self.assertEqual('USERNAME', target_type)
        target_type = sf.targetType("1234567890")
        self.assertEqual('BGP_AS_OWNER', target_type)
        target_type = sf.targetType("::1")
        self.assertEqual('IPV6_ADDRESS', target_type)
        target_type = sf.targetType("spiderfoot.net")
        self.assertEqual('INTERNET_NAME', target_type)
    def setup_server():
        default_config = {
            '_debug': False,  # Debug
            '__logging': True,  # Logging in general
            '__outputfilter':
            None,  # Event types to filter from modules' output
            '_useragent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0',  # User-Agent to use for HTTP requests
            '_dnsserver': '',  # Override the default resolver
            '_fetchtimeout':
            5,  # number of seconds before giving up on a fetch
            '_internettlds':
            'https://publicsuffix.org/list/effective_tld_names.dat',
            '_internettlds_cache': 72,
            '_genericusers':
            "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
            '__database': 'spiderfoot.test.db',  # note: test database file
            '__modules__':
            None,  # List of modules. Will be set after start-up.
            '_socks1type': '',
            '_socks2addr': '',
            '_socks3port': '',
            '_socks4user': '',
            '_socks5pwd': '',
            '_torctlport': 9051,
            '__logstdout': False
        }

        default_web_config = {'root': '/'}

        sfModules = dict()
        sf = SpiderFoot(default_config)
        mod_dir = sf.myPath() + '/modules/'
        for filename in os.listdir(mod_dir):
            if not filename.startswith("sfp_"):
                continue
            if not filename.endswith(".py"):
                continue
            # Skip the module template and debugging modules
            if filename in ('sfp_template.py', 'sfp_stor_print.py'):
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(),
                             [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            sfModules[modName]['name'] = sfModules[modName]['object'].meta[
                'name']
            sfModules[modName]['cats'] = sfModules[modName]['object'].meta.get(
                'categories', list())
            sfModules[modName]['group'] = sfModules[modName][
                'object'].meta.get('useCases', list())
            if len(sfModules[modName]['cats']) > 1:
                raise ValueError(
                    f"Module {modName} has multiple categories defined but only one is supported."
                )
            sfModules[modName]['labels'] = sfModules[modName][
                'object'].meta.get('flags', list())
            sfModules[modName]['descr'] = sfModules[modName]['object'].meta[
                'summary']
            sfModules[modName]['provides'] = sfModules[modName][
                'object'].producedEvents()
            sfModules[modName]['consumes'] = sfModules[modName][
                'object'].watchedEvents()
            sfModules[modName]['meta'] = sfModules[modName]['object'].meta
            if hasattr(sfModules[modName]['object'], 'opts'):
                sfModules[modName]['opts'] = sfModules[modName]['object'].opts
            if hasattr(sfModules[modName]['object'], 'optdescs'):
                sfModules[modName]['optdescs'] = sfModules[modName][
                    'object'].optdescs

        default_config['__modules__'] = sfModules

        conf = {
            '/query': {
                'tools.encode.text_only': False,
                'tools.encode.add_charset': True,
            },
            '/static': {
                'tools.staticdir.on': True,
                'tools.staticdir.dir': 'static',
                'tools.staticdir.root': sf.myPath()
            }
        }

        cherrypy.tree.mount(SpiderFootWebUi(default_web_config,
                                            default_config),
                            script_name=default_web_config.get('root'),
                            config=conf)
Exemple #44
0
class SpiderFootDb:
    sf = None
    dbh = None
    conn = None

    # Queries for creating the SpiderFoot database
    createQueries = [
        "PRAGMA journal_mode=WAL",
        "CREATE TABLE tbl_event_types ( \
            event       VARCHAR NOT NULL PRIMARY KEY, \
            event_descr VARCHAR NOT NULL, \
            event_raw   INT NOT NULL DEFAULT 0, \
            event_type  VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_config ( \
            scope   VARCHAR NOT NULL, \
            opt     VARCHAR NOT NULL, \
            val     VARCHAR NOT NULL, \
            PRIMARY KEY (scope, opt) \
        )",
        "CREATE TABLE tbl_scan_instance ( \
            guid        VARCHAR NOT NULL PRIMARY KEY, \
            name        VARCHAR NOT NULL, \
            seed_target VARCHAR NOT NULL, \
            created     INT DEFAULT 0, \
            started     INT DEFAULT 0, \
            ended       INT DEFAULT 0, \
            status      VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_scan_log ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            generated           INT NOT NULL, \
            component           VARCHAR, \
            type                VARCHAR NOT NULL, \
            message             VARCHAR \
        )",
        "CREATE TABLE tbl_scan_config ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            component           VARCHAR NOT NULL, \
            opt                 VARCHAR NOT NULL, \
            val                 VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_scan_results ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            hash                VARCHAR NOT NULL, \
            type                VARCHAR NOT NULL REFERENCES tbl_event_types(event), \
            generated           INT NOT NULL, \
            confidence          INT NOT NULL DEFAULT 100, \
            visibility          INT NOT NULL DEFAULT 100, \
            risk                INT NOT NULL DEFAULT 0, \
            module              VARCHAR NOT NULL, \
            data                VARCHAR, \
            false_positive      INT NOT NULL DEFAULT 0, \
            source_event_hash  VARCHAR DEFAULT 'ROOT' \
        )",
        "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)",
        "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)",
        "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)",
        "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)",
        "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ROOT', 'Internal SpiderFoot Root event', 1, 'INTERNAL')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED', 'Account on External Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED_COMPROMISED', 'Hacked Account on External Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED', 'User Account on External Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED_COMPROMISED', 'Hacked User Account on External Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_CATEGORY', 'Affiliate Description - Category', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_ABSTRACT', 'Affiliate Description - Abstract', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('APPSTORE_ENTRY', 'App Store Entry', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AMAZON_S3_BUCKET', 'Amazon S3 Bucket', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BASE64_DATA', 'Base64-encoded Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_URL', 'Darknet Mention URL', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_CONTENT', 'Darknet Mention Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_CATEGORY', 'Description - Category', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_ABSTRACT', 'Description - Abstract', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEVICE_TYPE', 'Device Type', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_TEXT', 'DNS TXT Record', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME', 'Domain Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME_PARENT', 'Domain Name (Parent)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_REGISTRAR', 'Domain Registrar', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_WHOIS', 'Domain Whois', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR', 'Email Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR_COMPROMISED', 'Hacked Email Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ERROR_MESSAGE', 'Error Message', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('GEOINFO', 'Physical Location', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HTTP_CODE', 'HTTP Status Code', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HUMAN_NAME', 'Human Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE', 'Interesting File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE_HISTORIC', 'Historic Interesting File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('JUNK_FILE', 'Junk File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERNET_NAME', 'Internet Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IP_ADDRESS', 'IP Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_EMAILADDR', 'Malicious E-mail Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_NETBLOCK', 'Malicious IP on Owned Netblock', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_WHOIS', 'Netblock Whois', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('OPERATING_SYSTEM', 'Operating System', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_URL', 'Leak Site URL', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_CONTENT', 'Leak Site Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHONE_NUMBER', 'Phone Number', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_ADDRESS', 'Physical Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_COORDINATES', 'Physical Coordinates', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PGP_KEY', 'PGP Public Key', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_HOSTING', 'Hosting Provider', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PUBLIC_CODE_REPO', 'Public Code Repository', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOFTWARE_USED', 'Software Used', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN', 'Open UDP Port', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN_INFO', 'Open UDP Port Information', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_EXTERNAL', 'URL (AdBlocked External)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_INTERNAL', 'URL (AdBlocked Internal)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM', 'URL (Form)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java Applet)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC', 'URL (Purely Static)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM_HISTORIC', 'Historic URL (Form)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH_HISTORIC', 'Historic URL (Uses Flash)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT_HISTORIC', 'Historic URL (Uses Javascript)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK_HISTORIC', 'Historic URL (Uses a Web Framework)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET_HISTORIC', 'Historic URL (Uses Java Applet)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC_HISTORIC', 'Historic URL (Purely Static)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD_HISTORIC', 'Historic URL (Accepts Passwords)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD_HISTORIC', 'Historic URL (Accepts Uploads)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('USERNAME', 'Username', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('VULNERABILITY', 'Vulnerability in Public Domain', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_BANNER', 'Web Server', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0, 'DESCRIPTOR')"
    ]

    def __init__(self, opts):
        self.sf = SpiderFoot(opts)

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10)
        if dbh is None:
            self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database'])
        dbh.text_factory = str

        self.conn = dbh
        self.dbh = dbh.cursor()

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
            self.conn.create_function("REGEXP", 2, __dbregex__)
        except sqlite3.Error:
            # .. If not set up, we set it up.
            try:
                self.create()
            except BaseException as e:
                self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0])
        return

    #
    # Back-end database operations
    #

    # Create the back-end schema
    def create(self):
        try:
            for qry in self.createQueries:
                self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            raise BaseException("SQL error encountered when setting up database: " + e.args[0])

    # Close the database handle
    def close(self):
        self.dbh.close()

    # Search results
    # criteria is search criteria such as:
    #  - scan_id (search within a scan, if omitted search all)
    #  - type (search a specific type, if omitted search all)
    #  - value (search values for a specific string, if omitted search all)
    #  - regex (search values for a regular expression)
    # ** at least two criteria must be set **
    def search(self, criteria, filterFp=False):
        if criteria.values().count(None) == 3:
            return False

        qvars = list()
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, c.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.source_event_hash = s.hash "

        if filterFp:
            qry += " AND c.false_positive <> 1 "

        if criteria.get('scan_id') is not None:
            qry += "AND c.scan_instance_id = ? "
            qvars.append(criteria['scan_id'])

        if criteria.get('type') is not None:
            qry += " AND c.type = ? "
            qvars.append(criteria['type'])

        if criteria.get('value') is not None:
            qry += " AND (c.data LIKE ? OR s.data LIKE ?) "
            qvars.append(criteria['value'])
            qvars.append(criteria['value'])

        if criteria.get('regex') is not None:
            qry += " AND (c.data REGEXP ? OR s.data REGEXP ?) "
            qvars.append(criteria['regex'])
            qvars.append(criteria['regex'])

        qry += " ORDER BY c.data"

        try:
            #print qry
            #print str(qvars)
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching search results: " + e.args[0])

    # Get event types
    def eventTypes(self):
        qry = "SELECT event_descr, event, event_raw, event_type FROM tbl_event_types"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving event types:" + e.args[0])

    # Log an event to the database
    def scanLogEvent(self, instanceId, classification, message, component=None):
        if component is None:
            component = "SpiderFoot"

        qry = "INSERT INTO tbl_scan_log \
            (scan_instance_id, generated, component, type, message) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                instanceId, time.time() * 1000, component, classification, message
            ))
            self.conn.commit()
        except sqlite3.Error as e:
            if "locked" in e.args[0]:
                # TODO: Do something smarter here to handle locked databases
                self.sf.fatal("Unable to log event in DB due to lock: " + e.args[0])
            else:
                self.sf.fatal("Unable to log event in DB: " + e.args[0])

        return True

    # Store a scan instance
    def scanInstanceCreate(self, instanceId, scanName, scanTarget):
        qry = "INSERT INTO tbl_scan_instance \
            (guid, name, seed_target, created, status) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED'
            ))
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.fatal("Unable to create instance in DB: " + e.args[0])

        return True

    # Update the start time, end time or status (or all 3) of a scan instance
    def scanInstanceSet(self, instanceId, started=None, ended=None, status=None):
        qvars = list()
        qry = "UPDATE tbl_scan_instance SET "

        if started is not None:
            qry += " started = ?,"
            qvars.append(started)

        if ended is not None:
            qry += " ended = ?,"
            qvars.append(ended)

        if status is not None:
            qry += " status = ?,"
            qvars.append(status)

        # guid = guid is a little hack to avoid messing with , placement above
        qry += " guid = guid WHERE guid = ?"
        qvars.append(instanceId)

        try:
            self.dbh.execute(qry, qvars)
            self.conn.commit()
        except sqlite3.Error:
            self.sf.fatal("Unable to set information for the scan instance.")

    # Return info about a scan instance (name, target, created, started,
    # ended, status) - don't need this yet - untested
    def scanInstanceGet(self, instanceId):
        qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \
            ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \
            FROM tbl_scan_instance WHERE guid = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchone()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving scan instance:" + e.args[0])

    # Obtain a summary of the results per event type
    def scanResultSummary(self, instanceId, by="type"):
        if by == "type":
            qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr"

        if by == "module":
            qry = "SELECT r.module, '', MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? GROUP BY r.module ORDER BY r.module DESC"

        if by == "entity":
            qry = "SELECT r.data, e.event_descr, MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? \
                AND e.event_type in ('ENTITY') \
                GROUP BY r.data, e.event_descr ORDER BY total DESC limit 50"

        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result summary: " + e.args[0])

    # Obtain the data for a scan and event type
    def scanResultEvent(self, instanceId, eventType='ALL', filterFp=False):
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type"

        qvars = [instanceId]

        if eventType != "ALL":
            qry += " AND c.type = ?"
            qvars.append(eventType)

        if filterFp:
            qry += " AND c.false_positive <> 1"

        qry += " ORDER BY c.data"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result events: " + e.args[0])

    # Obtain a unique list of elements
    def scanResultEventUnique(self, instanceId, eventType='ALL', filterFp=False):
        qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \
            WHERE scan_instance_id = ?"
        qvars = [instanceId]

        if eventType != "ALL":
            qry += " AND type = ?"
            qvars.append(eventType)

        if filterFp:
            qry += " AND false_positive <> 1"

        qry += " GROUP BY type, data ORDER BY COUNT(*)"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching unique result events: " + e.args[0])

    # Get scan logs
    def scanLogs(self, instanceId, limit=None):
        qry = "SELECT generated AS generated, component, \
            type, message FROM tbl_scan_log WHERE scan_instance_id = ? \
            ORDER BY generated DESC"
        qvars = [instanceId]

        if limit is not None:
            qry += " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan logs: " + e.args[0])

    # Get scan errors
    def scanErrors(self, instanceId, limit=None):
        qry = "SELECT generated AS generated, component, \
            message FROM tbl_scan_log WHERE scan_instance_id = ? \
            AND type = 'ERROR' ORDER BY generated DESC"
        qvars = [instanceId]

        if limit is not None:
            qry += " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan errors: " + e.args[0])

    # Delete a scan instance
    def scanInstanceDelete(self, instanceId):
        qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?"
        qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?"
        qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?"
        qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry1, qvars)
            self.dbh.execute(qry2, qvars)
            self.dbh.execute(qry3, qvars)
            self.dbh.execute(qry4, qvars)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when deleting scan: " + e.args[0])

    # Set the false positive flag for a result
    def scanResultsUpdateFP(self, instanceId, resultHashes, fpFlag):
        for resultHash in resultHashes:
            qry = "UPDATE tbl_scan_results SET false_positive = ? WHERE \
                scan_instance_id = ? AND hash = ?"
            qvars = [fpFlag, instanceId, resultHash]
            try:
                self.dbh.execute(qry, qvars)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when updating F/P: " + e.args[0], False)
                return False

        self.conn.commit()
        return True

    # Store the default configuration
    def configSet(self, optMap=dict()):
        qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)"
        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [parts[0], parts[1], optMap[opt]]
            else:
                # Global option
                qvals = ["GLOBAL", opt, optMap[opt]]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0])

            self.conn.commit()

    # Retreive the config from the database
    def configGet(self):
        qry = "SELECT scope, opt, val FROM tbl_config"
        try:
            retval = dict()
            self.dbh.execute(qry)
            for [scope, opt, val] in self.dbh.fetchall():
                if scope == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[scope + ":" + opt] = val

            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Reset the config to default (clear it from the DB and let the hard-coded
    # settings in the code take effect.)
    def configClear(self):
        qry = "DELETE from tbl_config"
        try:
            self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("Unable to clear configuration from the database: " + e.args[0])

    # Store a configuration value for a scan
    def scanConfigSet(self, id, optMap=dict()):
        qry = "REPLACE INTO tbl_scan_config \
                (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)"

        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [id, parts[0], parts[1], optMap[opt]]
            else:
                # Global option
                qvals = [id, "GLOBAL", opt, optMap[opt]]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0])

            self.conn.commit()

    # Retreive configuration data for a scan component
    def scanConfigGet(self, instanceId):
        qry = "SELECT component, opt, val FROM tbl_scan_config \
                WHERE scan_instance_id = ? ORDER BY component, opt"
        qvars = [instanceId]
        try:
            retval = dict()
            self.dbh.execute(qry, qvars)
            for [component, opt, val] in self.dbh.fetchall():
                if component == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[component + ":" + opt] = val
            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Store an event
    # eventData is a SpiderFootEvent object with the following variables:
    # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc.
    # - generated: time the event occurred
    # - confidence: how sure are we of this data's validity, 0-100
    # - visibility: how 'visible' was this data, 0-100
    # - risk: how much risk does this data represent, 0-100
    # - module: module that generated the event
    # - data: the actual data, i.e. a URL, port number, webpage content, etc.
    # - sourceEventHash: hash of the event that triggered this event
    # And getHash() will return the event hash.
    def scanEventStore(self, instanceId, sfEvent, truncateSize=0):
        storeData = ''

        if type(sfEvent.data) is not unicode:
            # If sfEvent.data is a dict or list, convert it to a string first, as
            # those types do not have a unicode converter.
            if type(sfEvent.data) is str:
                storeData = unicode(sfEvent.data, 'utf-8', errors='replace')
            else:
                try:
                    storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace')
                except BaseException as e:
                    self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data)))
        else:
            storeData = sfEvent.data

        if truncateSize > 0:
            storeData = storeData[0:truncateSize]

        if sfEvent.sourceEventHash in ["", None]:
            self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!")

        qry = "INSERT INTO tbl_scan_results \
            (scan_instance_id, hash, type, generated, confidence, \
            visibility, risk, module, data, source_event_hash) \
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
        qvals = [instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated,
                 sfEvent.confidence, sfEvent.visibility, sfEvent.risk,
                 sfEvent.module, storeData, sfEvent.sourceEventHash]

        #print "STORING: " + str(qvals)

        try:
            self.dbh.execute(qry, qvals)
            self.conn.commit()
            return None
        except sqlite3.Error as e:
            self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " + e.args[0])

    # List of all previously run scans
    def scanInstanceList(self):
        # SQLite doesn't support OUTER JOINs, so we need a work-around that
        # does a UNION of scans with results and scans without results to 
        # get a complete listing.
        qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \
            FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \
            AND r.type <> 'ROOT' GROUP BY i.guid \
            UNION ALL \
            SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \
            FROM tbl_scan_instance i  WHERE i.guid NOT IN ( \
            SELECT distinct scan_instance_id FROM tbl_scan_results WHERE type <> 'ROOT') \
            ORDER BY started DESC"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan list: " + e.args[0])

    # History of data from the scan
    def scanResultHistory(self, instanceId):
        qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \
                type, COUNT(*) FROM tbl_scan_results \
                WHERE scan_instance_id = ? GROUP BY hourmin, type"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan history: " + e.args[0])


    # Get the source IDs, types and data for a set of IDs
    def scanElementSourcesDirect(self, instanceId, elementIdList):
        # the output of this needs to be aligned with scanResultEvent,
        # as other functions call both expecting the same output.
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.hash in ("
        qvars = [instanceId]

        for hashId in elementIdList:
            qry = qry + "'" + hashId + "',"
        qry += "'')"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0])

    # Get the child IDs, types and data for a set of IDs
    def scanElementChildrenDirect(self, instanceId, elementIdList):
        # the output of this needs to be aligned with scanResultEvent,
        # as other functions call both expecting the same output.
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND s.hash in ("
        qvars = [instanceId]

        for hashId in elementIdList:
            qry = qry + "'" + hashId + "',"
        qry += "'')"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when getting child element IDs: " + e.args[0])

    # Get the full set of upstream IDs which are parents to the 
    # supplied set of IDs.
    # Data has to be in the format of output from scanElementSourcesDirect
    # and produce output in the same format.
    def scanElementSourcesAll(self, instanceId, childData):
        # Get the first round of source IDs for the leafs
        keepGoing = True
        nextIds = list()
        datamap = dict()
        pc = dict()

        for row in childData:
            # these must be unique values!
            parentId = row[9]
            childId = row[8]
            datamap[childId] = row

            if parentId in pc:
                if childId not in pc[parentId]:
                    pc[parentId].append(childId)
            else:
                pc[parentId] = [childId]

            # parents of the leaf set
            if parentId not in nextIds:
                nextIds.append(parentId)

        while keepGoing:
            parentSet = self.scanElementSourcesDirect(instanceId, nextIds)
            nextIds = list()
            keepGoing = False

            for row in parentSet:
                parentId = row[9]
                childId = row[8]
                datamap[childId] = row
                #print childId + " = " + str(row)

                if parentId in pc:
                    if childId not in pc[parentId]:
                        pc[parentId].append(childId)
                else:
                    pc[parentId] = [childId]
                if parentId not in nextIds:
                    nextIds.append(parentId)

                # Prevent us from looping at root
                if parentId != "ROOT":
                    keepGoing = True

        datamap[parentId] = row
        return [datamap, pc]

    # Get the full set of downstream IDs which are children of the 
    # supplied set of IDs
    # NOTE FOR NOW THE BEHAVIOR IS NOT THE SAME AS THE scanElementParent*
    # FUNCTIONS - THIS ONLY RETURNS IDS!!
    def scanElementChildrenAll(self, instanceId, parentIds):
        datamap = list()
        keepGoing = True
        nextIds = list()

        nextSet = self.scanElementChildrenDirect(instanceId, parentIds)
        for row in nextSet:
            datamap.append(row[8])

        for row in nextSet:
            if row[8] not in nextIds:
                nextIds.append(row[8])

        while keepGoing:
            nextSet = self.scanElementChildrenDirect(instanceId, nextIds)
            if nextSet == None or len(nextSet) == 0:
                keepGoing = False
                break

            for row in nextSet:
                datamap.append(row[8])
                nextIds = list()
                nextIds.append(row[8])

        return datamap
Exemple #45
0
        p.add_argument("-q", action='store_true', help="Disable logging.")
        args = p.parse_args()

        sfConfig['__logstdout'] = True
            
        if args.debug:
            sfConfig['_debug'] = True
        else:
            sfConfig['_debug'] = False

        if args.q:
            sfConfig['__logging'] = False


    sfModules = dict()
    sft = SpiderFoot(sfConfig)
    # Go through each module in the modules directory with a .py extension
    for filename in os.listdir(sft.myPath() + '/modules/'):
        if filename.startswith("sfp_") and filename.endswith(".py"):
            # Skip the module template and debugging modules
            if filename == "sfp_template.py" or filename == 'sfp_stor_print.py':
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(), [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 5)[0]
            sfModules[modName]['cats'] = sfModules[modName]['object'].__doc__.split(":", 5)[1].split(",")
            sfModules[modName]['group'] = sfModules[modName]['object'].__doc__.split(":", 5)[2]
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_torexits()
     module.setup(sf, dict())
Exemple #47
0
    '_socks2addr': 'SOCKS Server IP Address.',
    '_socks3port': 'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
    '_socks4user': '******',
    '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
    '_socks6dns': "Resolve DNS through the SOCKS proxy? Has no affect when TOR is used: Will always be True.",
    '_torctlport': "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.",
    '_modulesenabled': "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
}

if __name__ == '__main__':
    if len(sys.argv) > 1:
        (addr, port) = sys.argv[1].split(":")
        sfConfig['__webaddr'] = addr
        sfConfig['__webport'] = int(port)

    sf = SpiderFoot(sfConfig)
    sfModules = dict()

    # Go through each module in the modules directory with a .py extension
    for filename in os.listdir(sf.myPath() + '/modules/'):
        if filename.startswith("sfp_") and filename.endswith(".py"):
            # Skip the module template and debugging modules
            if filename == "sfp_template.py" or filename == 'sfp_stor_print.py':
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(), [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 2)[0]
Exemple #48
0
 def test_init(self):
     """
     Test __init__(self, options, handle=None):
     """
     sf = SpiderFoot(self.default_options)
     self.assertEqual('TBD', 'TBD')
Exemple #49
0
 def test_setup(self):
     sf = SpiderFoot(self.default_options)
     module = sfp_neutrinoapi()
     module.setup(sf, dict())
    '_socks2addr': 'SOCKS Server IP Address.',
    '_socks3port': 'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
    '_socks4user': '******',
    '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
    '_socks6dns': "Resolve DNS through the SOCKS proxy? Has no affect when TOR is used: Will always be True.",
    '_torctlport': "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.",
    '_modulesenabled': "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
}

if __name__ == '__main__':
    if len(sys.argv) > 1:
        (addr, port) = sys.argv[1].split(":")
        sfConfig['__webaddr'] = addr
        sfConfig['__webport'] = int(port)

    sf = SpiderFoot(sfConfig)
    sfModules = dict()

    # Go through each module in the modules directory with a .py extension
    for filename in os.listdir(sf.myPath() + '/modules/'):
        if filename.startswith("sfp_") and filename.endswith(".py"):
            # Skip the module template and debugging modules
            if filename == "sfp_template.py" or filename == 'sfp_stor_print.py':
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(), [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 5)[0]
Exemple #51
0
 def test_init_no_options(self):
     """
     Test __init__(self, options, handle=None):
     """
     sf = SpiderFoot(dict())
     self.assertEqual('TBD', 'TBD')
Exemple #52
0
def start_web_server(sfWebUiConfig, sfConfig):
    """Start the web server so you can start looking at results

    Args:
        sfWebUiConfig (dict): web server options
        sfConfig (dict): SpiderFoot config options
    """

    web_host = sfWebUiConfig.get('host', '127.0.0.1')
    web_port = sfWebUiConfig.get('port', 5001)
    web_root = sfWebUiConfig.get('root', '/')

    cherrypy.config.update({
        'log.screen': False,
        'server.socket_host': web_host,
        'server.socket_port': int(web_port)
    })

    log.info(f"Starting web server at {web_host}:{web_port} ...")

    # Disable auto-reloading of content
    cherrypy.engine.autoreload.unsubscribe()

    sf = SpiderFoot(sfConfig)

    # Enable access to static files via the web directory
    conf = {
        '/query': {
            'tools.encode.text_only': False,
            'tools.encode.add_charset': True,
        },
        '/static': {
            'tools.staticdir.on': True,
            'tools.staticdir.dir': 'static',
            'tools.staticdir.root': sf.myPath()
        }
    }

    passwd_file = sf.dataPath() + '/passwd'
    if os.path.isfile(passwd_file):
        if not os.access(passwd_file, os.R_OK):
            log.error("Could not read passwd file. Permission denied.")
            sys.exit(-1)

        secrets = dict()

        pw = open(passwd_file, 'r')

        for line in pw.readlines():
            if ':' not in line:
                log.error(
                    "Incorrect format of passwd file, must be username:password on each line."
                )
                sys.exit(-1)

            u = line.strip().split(":")[0]
            p = ':'.join(line.strip().split(":")[1:])

            if not u or not p:
                log.error(
                    "Incorrect format of passwd file, must be username:password on each line."
                )
                sys.exit(-1)

            secrets[u] = p

        if secrets:
            log.info("Enabling authentication based on supplied passwd file.")
            conf['/'] = {
                'tools.auth_digest.on':
                True,
                'tools.auth_digest.realm':
                web_host,
                'tools.auth_digest.get_ha1':
                auth_digest.get_ha1_dict_plain(secrets),
                'tools.auth_digest.key':
                random.SystemRandom().randint(0, 99999999)
            }
        else:
            warn_msg = "\n********************************************************************\n"
            warn_msg += "Warning: passwd file contains no passwords. Authentication disabled.\n"
            warn_msg += "********************************************************************\n"
            log.warning(warn_msg)
    else:
        warn_msg = "\n********************************************************************\n"
        warn_msg += "Please consider adding authentication to protect this instance!\n"
        warn_msg += "Refer to https://www.spiderfoot.net/documentation/#security.\n"
        warn_msg += "********************************************************************\n"
        log.warning(warn_msg)

    if web_host == "0.0.0.0":  # nosec
        url = f"http://<IP of this host>:{web_port}{web_root}"
    else:
        url = f"http://{web_host}:{web_port}{web_root}"

    key_path = sf.dataPath() + '/spiderfoot.key'
    crt_path = sf.dataPath() + '/spiderfoot.crt'
    if os.path.isfile(key_path) and os.path.isfile(crt_path):
        if not os.access(crt_path, os.R_OK):
            log.critical(f"Could not read {crt_path} file. Permission denied.")
            sys.exit(-1)

        if not os.access(key_path, os.R_OK):
            log.critical(f"Could not read {key_path} file. Permission denied.")
            sys.exit(-1)

        log.info("Enabling SSL based on supplied key and certificate file.")
        cherrypy.server.ssl_module = 'builtin'
        cherrypy.server.ssl_certificate = crt_path
        cherrypy.server.ssl_private_key = key_path

        url = url.replace("http://", "https://")

    print("")
    print("*************************************************************")
    print(" Use SpiderFoot by starting your web browser of choice and ")
    print(f" browse to {url}")
    print("*************************************************************")
    print("")

    cherrypy.quickstart(SpiderFootWebUi(sfWebUiConfig, sfConfig),
                        script_name=web_root,
                        config=conf)
Exemple #53
0
def main():
    if len(sys.argv) <= 1:
        print(
            "SpiderFoot requires -l <ip>:<port> to start the web server. Try --help for guidance."
        )
        sys.exit(-1)

    # web server config
    sfWebUiConfig = {'host': '127.0.0.1', 'port': 5001, 'root': '/'}

    # 'Global' configuration options
    # These can be overriden on a per-module basis, and some will
    # be overridden from saved configuration settings stored in the DB.
    sfConfig = {
        '_debug': False,  # Debug
        '__logging': True,  # Logging in general
        '__outputfilter': None,  # Event types to filter from modules' output
        '_useragent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0',  # User-Agent to use for HTTP requests
        '_dnsserver': '',  # Override the default resolver
        '_fetchtimeout': 5,  # number of seconds before giving up on a fetch
        '_internettlds':
        'https://publicsuffix.org/list/effective_tld_names.dat',
        '_internettlds_cache': 72,
        '_genericusers':
        "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
        '__version__': '3.3-DEV',
        '__database': 'spiderfoot.db',
        '__modules__': None,  # List of modules. Will be set after start-up.
        '_socks1type': '',
        '_socks2addr': '',
        '_socks3port': '',
        '_socks4user': '',
        '_socks5pwd': '',
        '_torctlport': 9051
    }

    sfOptdescs = {
        '_debug': "Enable debugging?",
        '_useragent':
        "User-Agent string to use for HTTP requests. Prefix with an '@' to randomly select the User Agent from a file containing user agent strings for each request, e.g. @C:\\useragents.txt or @/home/bob/useragents.txt. Or supply a URL to load the list from there.",
        '_dnsserver':
        "Override the default resolver with another DNS server. For example, 8.8.8.8 is Google's open DNS server.",
        '_fetchtimeout':
        "Number of seconds before giving up on a HTTP request.",
        '_internettlds': "List of Internet TLDs.",
        '_internettlds_cache':
        "Hours to cache the Internet TLD list. This can safely be quite a long time given that the list doesn't change too often.",
        '_genericusers':
        "List of usernames that if found as usernames or as part of e-mail addresses, should be treated differently to non-generics.",
        '_socks1type': "SOCKS Server Type. Can be '4', '5', 'HTTP' or 'TOR'",
        '_socks2addr': 'SOCKS Server IP Address.',
        '_socks3port':
        'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
        '_socks4user':
        '******',
        '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
        '_torctlport':
        "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.",
        '_modulesenabled':
        "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
    }

    # Legacy way to run the server
    args = None
    p = argparse.ArgumentParser(
        description='SpiderFoot 3.3-DEV: Open Source Intelligence Automation.')
    p.add_argument("-d",
                   "--debug",
                   action='store_true',
                   help="Enable debug output.")
    p.add_argument("-l", metavar="IP:port", help="IP and port to listen on.")
    p.add_argument("-m",
                   metavar="mod1,mod2,...",
                   type=str,
                   help="Modules to enable.")
    p.add_argument("-M",
                   "--modules",
                   action='store_true',
                   help="List available modules.")
    p.add_argument("-s", metavar="TARGET", help="Target for the scan.")
    p.add_argument(
        "-t",
        metavar="type1,type2,...",
        type=str,
        help="Event types to collect (modules selected automatically).")
    p.add_argument("-T",
                   "--types",
                   action='store_true',
                   help="List available event types.")
    p.add_argument(
        "-o",
        metavar="tab|csv|json",
        type=str,
        help="Output format. Tab is default. If using json, -q is enforced.")
    p.add_argument("-H",
                   action='store_true',
                   help="Don't print field headers, just data.")
    p.add_argument("-n", action='store_true', help="Strip newlines from data.")
    p.add_argument("-r",
                   action='store_true',
                   help="Include the source data field in tab/csv output.")
    p.add_argument(
        "-S",
        metavar="LENGTH",
        type=int,
        help="Maximum data length to display. By default, all data is shown.")
    p.add_argument("-D",
                   metavar='DELIMITER',
                   type=str,
                   help="Delimiter to use for CSV output. Default is ,.")
    p.add_argument(
        "-f",
        action='store_true',
        help="Filter out other event types that weren't requested with -t.")
    p.add_argument("-F",
                   metavar="type1,type2,...",
                   type=str,
                   help="Show only a set of event types, comma-separated.")
    p.add_argument(
        "-x",
        action='store_true',
        help=
        "STRICT MODE. Will only enable modules that can directly consume your target, and if -t was specified only those events will be consumed by modules. This overrides -t and -m options."
    )
    p.add_argument("-q",
                   action='store_true',
                   help="Disable logging. This will also hide errors!")
    args = p.parse_args()

    if args.debug:
        sfConfig['_debug'] = True
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)
        sfConfig['_debug'] = False

    if args.q or args.o == "json":
        log.setLevel(logging.NOTSET)
        sfConfig['__logging'] = False

    sfModules = dict()
    sft = SpiderFoot(sfConfig)

    # Load each module in the modules directory with a .py extension
    mod_dir = sft.myPath() + '/modules/'

    if not os.path.isdir(mod_dir):
        log.critical(f"Modules directory does not exist: {mod_dir}")
        sys.exit(-1)

    for filename in os.listdir(mod_dir):
        if filename.startswith("sfp_") and filename.endswith(".py"):
            # Skip the module template and debugging modules
            if filename in ('sfp_template.py', 'sfp_stor_print.py'):
                continue
            modName = filename.split('.')[0]

            # Load and instantiate the module
            sfModules[modName] = dict()
            mod = __import__('modules.' + modName, globals(), locals(),
                             [modName])
            sfModules[modName]['object'] = getattr(mod, modName)()
            try:
                sfModules[modName]['name'] = sfModules[modName]['object'].meta[
                    'name']
                sfModules[modName]['cats'] = sfModules[modName][
                    'object'].meta.get('categories', list())
                sfModules[modName]['group'] = sfModules[modName][
                    'object'].meta.get('useCases', list())
                if len(sfModules[modName]['cats']) > 1:
                    raise ValueError(
                        f"Module {modName} has multiple categories defined but only one is supported."
                    )
                sfModules[modName]['labels'] = sfModules[modName][
                    'object'].meta.get('flags', list())
                sfModules[modName]['descr'] = sfModules[modName][
                    'object'].meta['summary']
                sfModules[modName]['provides'] = sfModules[modName][
                    'object'].producedEvents()
                sfModules[modName]['consumes'] = sfModules[modName][
                    'object'].watchedEvents()
                sfModules[modName]['meta'] = sfModules[modName]['object'].meta
                if hasattr(sfModules[modName]['object'], 'opts'):
                    sfModules[modName]['opts'] = sfModules[modName][
                        'object'].opts
                if hasattr(sfModules[modName]['object'], 'optdescs'):
                    sfModules[modName]['optdescs'] = sfModules[modName][
                        'object'].optdescs
            except BaseException as e:
                log.critical(f"Failed to load {modName}: {e}")
                sys.exit(-1)

    if not sfModules:
        log.critical(f"No modules found in modules directory: {mod_dir}")
        sys.exit(-1)

    # Add module info to sfConfig so it can be used by the UI
    sfConfig['__modules__'] = sfModules
    # Add descriptions of the global config options
    sfConfig['__globaloptdescs__'] = sfOptdescs

    if args.l:
        try:
            (host, port) = args.l.split(":")
        except BaseException:
            log.critical("Invalid ip:port format.")
            sys.exit(-1)

        sfWebUiConfig['host'] = host
        sfWebUiConfig['port'] = port

        start_web_server(sfWebUiConfig, sfConfig)
    else:
        start_scan(sfConfig, sfModules, args)
Exemple #54
0
def start_scan(sfConfig, sfModules, args):
    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = sf.targetType(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m:
        log.warning(
            "You didn't specify any modules or types, so all will be enabled.")
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    # Run the scan
    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")
    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    # Prepare scan output headers
    if args.o == "json":
        print("[", end='')
    elif not args.H:
        delim = "\t"

        if args.o == "tab":
            delim = "\t"

        if args.o == "csv":
            if args.D:
                delim = args.D
            else:
                delim = ","

        if args.r:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data")
            else:
                headers = delim.join(["Source", "Type", "Source Data", "Data"])
        else:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}'.format(
                    "Source", delim, "Type", delim, "Data")
            else:
                headers = delim.join(["Source", "Type", "Data"])

        print(headers)

    # Start running a new scan
    scanName = target
    scanId = sf.genScanInstanceId()
    try:
        p = mp.Process(target=SpiderFootScanner,
                       args=(scanName, scanId, target, targetType, modlist,
                             cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # Poll for scan status until completion
    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return
Exemple #55
0
    def startscan(self, scanname, scantarget, modulelist, typelist, usecase):
        global globalScanStatus

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modopts = dict()  # Not used yet as module options are set globally
        modlist = list()
        sf = SpiderFoot(cfg)
        dbh = SpiderFootDb(cfg)
        types = dbh.eventTypes()
        targetType = None
        [scanname, scantarget] = self.cleanUserInput([scanname, scantarget])

        if scanname == "" or scantarget == "":
            return self.error("Form incomplete.")

        if typelist == "" and modulelist == "" and usecase == "":
            return self.error("Form incomplete.")

        # User selected modules
        if modulelist != "":
            modlist = modulelist.replace('module_', '').split(',')

        # User selected types
        if len(modlist) == 0 and typelist != "":
            typesx = typelist.replace('type_', '').split(',')
            # 1. Find all modules that produce the requested types
            modlist = sf.modulesProducing(typesx)
            newmods = deepcopy(modlist)
            newmodcpy = deepcopy(newmods)
            # 2. For each type those modules consume, get modules producing
            while len(newmodcpy) > 0:
                for etype in sf.eventsToModules(newmodcpy):
                    xmods = sf.modulesProducing([etype])
                    for mod in xmods:
                        if mod not in modlist:
                            modlist.append(mod)
                            newmods.append(mod)
                newmodcpy = deepcopy(newmods)
                newmods = list()

        # User selected a use case
        if len(modlist) == 0 and usecase != "":
            for mod in self.config['__modules__']:
                if usecase == 'all' or usecase in self.config['__modules__'][
                        mod]['cats']:
                    modlist.append(mod)

        # Add our mandatory storage module..
        if "sfp__stor_db" not in modlist:
            modlist.append("sfp__stor_db")
        modlist.sort()

        targetType = sf.targetType(scantarget)
        if targetType is None:
            return self.error("Invalid target type. Could not recognize it as " + \
                              "an IP address, IP subnet, domain name or host name.")

        # Start running a new scan
        scanId = sf.genScanInstanceGUID(scanname)
        t = SpiderFootScanner(scanname, scantarget.lower(), targetType, scanId,
                              modlist, cfg, modopts)
        t.start()

        # Wait until the scan has initialized
        while globalScanStatus.getStatus(scanId) is None:
            print "[info] Waiting for the scan to initialize..."
            time.sleep(1)

        templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup)
        return templ.render(id=scanId,
                            name=scanname,
                            docroot=self.docroot,
                            status=globalScanStatus.getStatus(scanId),
                            pageid="SCANLIST")
 def test_init(self):
     """
     Test __init__(self, options, handle=None):
     """
     sf = SpiderFoot(self.default_options)
     self.assertIsInstance(sf, SpiderFoot)
 def test_init_no_options(self):
     """
     Test __init__(self, options, handle=None):
     """
     sf = SpiderFoot(dict())
     self.assertIsInstance(sf, SpiderFoot)
Exemple #58
-1
class SpiderFootDb:
    sf = None
    dbh = None
    conn = None

    # Queries for creating the SpiderFoot database
    createQueries = [
        "PRAGMA journal_mode=WAL",
        "CREATE TABLE tbl_event_types ( \
            event       VARCHAR NOT NULL PRIMARY KEY, \
            event_descr VARCHAR NOT NULL, \
            event_raw   INT NOT NULL DEFAULT 0, \
            event_type  VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_config ( \
            scope   VARCHAR NOT NULL, \
            opt     VARCHAR NOT NULL, \
            val     VARCHAR NOT NULL, \
            PRIMARY KEY (scope, opt) \
        )",
        "CREATE TABLE tbl_scan_instance ( \
            guid        VARCHAR NOT NULL PRIMARY KEY, \
            name        VARCHAR NOT NULL, \
            seed_target VARCHAR NOT NULL, \
            created     INT DEFAULT 0, \
            started     INT DEFAULT 0, \
            ended       INT DEFAULT 0, \
            status      VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_scan_log ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            generated           INT NOT NULL, \
            component           VARCHAR, \
            type                VARCHAR NOT NULL, \
            message             VARCHAR \
        )",
        "CREATE TABLE tbl_scan_config ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            component           VARCHAR NOT NULL, \
            opt                 VARCHAR NOT NULL, \
            val                 VARCHAR NOT NULL \
        )",
        "CREATE TABLE tbl_scan_results ( \
            scan_instance_id    VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \
            hash                VARCHAR NOT NULL, \
            type                VARCHAR NOT NULL REFERENCES tbl_event_types(event), \
            generated           INT NOT NULL, \
            confidence          INT NOT NULL DEFAULT 100, \
            visibility          INT NOT NULL DEFAULT 100, \
            risk                INT NOT NULL DEFAULT 0, \
            module              VARCHAR NOT NULL, \
            data                VARCHAR, \
            false_positive      INT NOT NULL DEFAULT 0, \
            source_event_hash  VARCHAR DEFAULT 'ROOT' \
        )",
        "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)",
        "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)",
        "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)",
        "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)",
        "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ROOT', 'Internal SpiderFoot Root event', 1, 'INTERNAL')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED', 'Account on External Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED_COMPROMISED', 'Hacked Account on External Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED', 'User Account on External Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED_COMPROMISED', 'Hacked User Account on External Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_CATEGORY', 'Affiliate Description - Category', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_ABSTRACT', 'Affiliate Description - Abstract', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('APPSTORE_ENTRY', 'App Store Entry', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AMAZON_S3_BUCKET', 'Amazon S3 Bucket', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BASE64_DATA', 'Base64-encoded Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BITCOIN_ADDRESS', 'Bitcoin Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BITCOIN_BALANCE', 'Bitcoin Balance', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_URL', 'Darknet Mention URL', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_CONTENT', 'Darknet Mention Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_CATEGORY', 'Description - Category', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_ABSTRACT', 'Description - Abstract', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEVICE_TYPE', 'Device Type', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_TEXT', 'DNS TXT Record', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_SPF', 'DNS SPF Record', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME', 'Domain Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME_PARENT', 'Domain Name (Parent)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_REGISTRAR', 'Domain Registrar', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_WHOIS', 'Domain Whois', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR', 'Email Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR_COMPROMISED', 'Hacked Email Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ERROR_MESSAGE', 'Error Message', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('GEOINFO', 'Physical Location', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HTTP_CODE', 'HTTP Status Code', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HUMAN_NAME', 'Human Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE', 'Interesting File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE_HISTORIC', 'Historic Interesting File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('JUNK_FILE', 'Junk File', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERNET_NAME', 'Internet Name', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IP_ADDRESS', 'IP Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_EMAILADDR', 'Malicious E-mail Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_NETBLOCK', 'Malicious IP on Owned Netblock', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_WHOIS', 'Netblock Whois', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('OPERATING_SYSTEM', 'Operating System', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_URL', 'Leak Site URL', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_CONTENT', 'Leak Site Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHONE_NUMBER', 'Phone Number', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_ADDRESS', 'Physical Address', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_COORDINATES', 'Physical Coordinates', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PGP_KEY', 'PGP Public Key', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_HOSTING', 'Hosting Provider', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PUBLIC_CODE_REPO', 'Public Code Repository', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOFTWARE_USED', 'Software Used', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN', 'Open UDP Port', 0, 'SUBENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN_INFO', 'Open UDP Port Information', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_EXTERNAL', 'URL (AdBlocked External)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_INTERNAL', 'URL (AdBlocked Internal)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM', 'URL (Form)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java Applet)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC', 'URL (Purely Static)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM_HISTORIC', 'Historic URL (Form)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH_HISTORIC', 'Historic URL (Uses Flash)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT_HISTORIC', 'Historic URL (Uses Javascript)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK_HISTORIC', 'Historic URL (Uses a Web Framework)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET_HISTORIC', 'Historic URL (Uses Java Applet)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC_HISTORIC', 'Historic URL (Purely Static)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD_HISTORIC', 'Historic URL (Accepts Passwords)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD_HISTORIC', 'Historic URL (Accepts Uploads)', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('USERNAME', 'Username', 0, 'ENTITY')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('VULNERABILITY', 'Vulnerability in Public Domain', 0, 'DESCRIPTOR')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_BANNER', 'Web Server', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0, 'DATA')",
        "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0, 'DESCRIPTOR')"
    ]

    def __init__(self, opts):
        self.sf = SpiderFoot(opts)

        # connect() will create the database file if it doesn't exist, but
        # at least we can use this opportunity to ensure we have permissions to
        # read and write to such a file.
        dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10)
        if dbh is None:
            self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database'])
        dbh.text_factory = str

        self.conn = dbh
        self.dbh = dbh.cursor()

        # Now we actually check to ensure the database file has the schema set
        # up correctly.
        try:
            self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
            self.conn.create_function("REGEXP", 2, __dbregex__)
        except sqlite3.Error:
            # .. If not set up, we set it up.
            try:
                self.create()
            except BaseException as e:
                self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0])
        return

    #
    # Back-end database operations
    #

    # Create the back-end schema
    def create(self):
        try:
            for qry in self.createQueries:
                self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            raise BaseException("SQL error encountered when setting up database: " + e.args[0])

    # Close the database handle
    def close(self):
        self.dbh.close()

    # Search results
    # criteria is search criteria such as:
    #  - scan_id (search within a scan, if omitted search all)
    #  - type (search a specific type, if omitted search all)
    #  - value (search values for a specific string, if omitted search all)
    #  - regex (search values for a regular expression)
    # ** at least two criteria must be set **
    def search(self, criteria, filterFp=False):
        if criteria.values().count(None) == 3:
            return False

        qvars = list()
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, c.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.source_event_hash = s.hash "

        if filterFp:
            qry += " AND c.false_positive <> 1 "

        if criteria.get('scan_id') is not None:
            qry += "AND c.scan_instance_id = ? "
            qvars.append(criteria['scan_id'])

        if criteria.get('type') is not None:
            qry += " AND c.type = ? "
            qvars.append(criteria['type'])

        if criteria.get('value') is not None:
            qry += " AND (c.data LIKE ? OR s.data LIKE ?) "
            qvars.append(criteria['value'])
            qvars.append(criteria['value'])

        if criteria.get('regex') is not None:
            qry += " AND (c.data REGEXP ? OR s.data REGEXP ?) "
            qvars.append(criteria['regex'])
            qvars.append(criteria['regex'])

        qry += " ORDER BY c.data"

        try:
            #print qry
            #print str(qvars)
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching search results: " + e.args[0])

    # Get event types
    def eventTypes(self):
        qry = "SELECT event_descr, event, event_raw, event_type FROM tbl_event_types"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving event types:" + e.args[0])

    # Log an event to the database
    def scanLogEvent(self, instanceId, classification, message, component=None):
        if component is None:
            component = "SpiderFoot"

        qry = "INSERT INTO tbl_scan_log \
            (scan_instance_id, generated, component, type, message) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                instanceId, time.time() * 1000, component, classification, message
            ))
            self.conn.commit()
        except sqlite3.Error as e:
            if "locked" in e.args[0]:
                # TODO: Do something smarter here to handle locked databases
                self.sf.fatal("Unable to log event in DB due to lock: " + e.args[0])
            else:
                self.sf.fatal("Unable to log event in DB: " + e.args[0])

        return True

    # Store a scan instance
    def scanInstanceCreate(self, instanceId, scanName, scanTarget):
        qry = "INSERT INTO tbl_scan_instance \
            (guid, name, seed_target, created, status) \
            VALUES (?, ?, ?, ?, ?)"
        try:
            self.dbh.execute(qry, (
                instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED'
            ))
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.fatal("Unable to create instance in DB: " + e.args[0])

        return True

    # Update the start time, end time or status (or all 3) of a scan instance
    def scanInstanceSet(self, instanceId, started=None, ended=None, status=None):
        qvars = list()
        qry = "UPDATE tbl_scan_instance SET "

        if started is not None:
            qry += " started = ?,"
            qvars.append(started)

        if ended is not None:
            qry += " ended = ?,"
            qvars.append(ended)

        if status is not None:
            qry += " status = ?,"
            qvars.append(status)

        # guid = guid is a little hack to avoid messing with , placement above
        qry += " guid = guid WHERE guid = ?"
        qvars.append(instanceId)

        try:
            self.dbh.execute(qry, qvars)
            self.conn.commit()
        except sqlite3.Error:
            self.sf.fatal("Unable to set information for the scan instance.")

    # Return info about a scan instance (name, target, created, started,
    # ended, status) - don't need this yet - untested
    def scanInstanceGet(self, instanceId):
        qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \
            ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \
            FROM tbl_scan_instance WHERE guid = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchone()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when retreiving scan instance:" + e.args[0])

    # Obtain a summary of the results per event type
    def scanResultSummary(self, instanceId, by="type"):
        if by == "type":
            qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr"

        if by == "module":
            qry = "SELECT r.module, '', MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? GROUP BY r.module ORDER BY r.module DESC"

        if by == "entity":
            qry = "SELECT r.data, e.event_descr, MAX(ROUND(generated)) AS last_in, \
                count(*) AS total, count(DISTINCT r.data) as utotal FROM \
                tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
                AND r.scan_instance_id = ? \
                AND e.event_type in ('ENTITY') \
                GROUP BY r.data, e.event_descr ORDER BY total DESC limit 50"

        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result summary: " + e.args[0])

    # Obtain the data for a scan and event type
    def scanResultEvent(self, instanceId, eventType='ALL', filterFp=False):
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type"

        qvars = [instanceId]

        if eventType != "ALL":
            qry += " AND c.type = ?"
            qvars.append(eventType)

        if filterFp:
            qry += " AND c.false_positive <> 1"

        qry += " ORDER BY c.data"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching result events: " + e.args[0])

    # Obtain a unique list of elements
    def scanResultEventUnique(self, instanceId, eventType='ALL', filterFp=False):
        qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \
            WHERE scan_instance_id = ?"
        qvars = [instanceId]

        if eventType != "ALL":
            qry += " AND type = ?"
            qvars.append(eventType)

        if filterFp:
            qry += " AND false_positive <> 1"

        qry += " GROUP BY type, data ORDER BY COUNT(*)"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching unique result events: " + e.args[0])

    # Get scan logs
    def scanLogs(self, instanceId, limit=None, fromRowId=None, reverse=False):
        qry = "SELECT generated AS generated, component, \
            type, message, rowid FROM tbl_scan_log WHERE scan_instance_id = ?"
        if fromRowId:
            qry += " and rowid > ?"
        
        qry += " ORDER BY generated "
        if reverse:
            qry += "ASC"
        else:
            qry += "DESC"
        qvars = [instanceId]

        if fromRowId:
            qvars.append(fromRowId)

        if limit is not None:
            qry += " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan logs: " + e.args[0])

    # Get scan errors
    def scanErrors(self, instanceId, limit=None):
        qry = "SELECT generated AS generated, component, \
            message FROM tbl_scan_log WHERE scan_instance_id = ? \
            AND type = 'ERROR' ORDER BY generated DESC"
        qvars = [instanceId]

        if limit is not None:
            qry += " LIMIT ?"
            qvars.append(limit)

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan errors: " + e.args[0])

    # Delete a scan instance
    def scanInstanceDelete(self, instanceId):
        qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?"
        qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?"
        qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?"
        qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry1, qvars)
            self.dbh.execute(qry2, qvars)
            self.dbh.execute(qry3, qvars)
            self.dbh.execute(qry4, qvars)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when deleting scan: " + e.args[0])

    # Set the false positive flag for a result
    def scanResultsUpdateFP(self, instanceId, resultHashes, fpFlag):
        for resultHash in resultHashes:
            qry = "UPDATE tbl_scan_results SET false_positive = ? WHERE \
                scan_instance_id = ? AND hash = ?"
            qvars = [fpFlag, instanceId, resultHash]
            try:
                self.dbh.execute(qry, qvars)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when updating F/P: " + e.args[0], False)
                return False

        self.conn.commit()
        return True

    # Store the default configuration
    def configSet(self, optMap=dict()):
        qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)"
        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [parts[0], parts[1], optMap[opt]]
            else:
                # Global option
                qvals = ["GLOBAL", opt, optMap[opt]]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0])

            self.conn.commit()

    # Retreive the config from the database
    def configGet(self):
        qry = "SELECT scope, opt, val FROM tbl_config"
        try:
            retval = dict()
            self.dbh.execute(qry)
            for [scope, opt, val] in self.dbh.fetchall():
                if scope == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[scope + ":" + opt] = val

            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Reset the config to default (clear it from the DB and let the hard-coded
    # settings in the code take effect.)
    def configClear(self):
        qry = "DELETE from tbl_config"
        try:
            self.dbh.execute(qry)
            self.conn.commit()
        except sqlite3.Error as e:
            self.sf.error("Unable to clear configuration from the database: " + e.args[0])

    # Store a configuration value for a scan
    def scanConfigSet(self, id, optMap=dict()):
        qry = "REPLACE INTO tbl_scan_config \
                (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)"

        for opt in optMap.keys():
            # Module option
            if ":" in opt:
                parts = opt.split(':')
                qvals = [id, parts[0], parts[1], optMap[opt]]
            else:
                # Global option
                qvals = [id, "GLOBAL", opt, optMap[opt]]

            try:
                self.dbh.execute(qry, qvals)
            except sqlite3.Error as e:
                self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0])

            self.conn.commit()

    # Retreive configuration data for a scan component
    def scanConfigGet(self, instanceId):
        qry = "SELECT component, opt, val FROM tbl_scan_config \
                WHERE scan_instance_id = ? ORDER BY component, opt"
        qvars = [instanceId]
        try:
            retval = dict()
            self.dbh.execute(qry, qvars)
            for [component, opt, val] in self.dbh.fetchall():
                if component == "GLOBAL":
                    retval[opt] = val
                else:
                    retval[component + ":" + opt] = val
            return retval
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching configuration: " + e.args[0])

    # Store an event
    # eventData is a SpiderFootEvent object with the following variables:
    # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc.
    # - generated: time the event occurred
    # - confidence: how sure are we of this data's validity, 0-100
    # - visibility: how 'visible' was this data, 0-100
    # - risk: how much risk does this data represent, 0-100
    # - module: module that generated the event
    # - data: the actual data, i.e. a URL, port number, webpage content, etc.
    # - sourceEventHash: hash of the event that triggered this event
    # And getHash() will return the event hash.
    def scanEventStore(self, instanceId, sfEvent, truncateSize=0):
        storeData = ''

        if type(sfEvent.data) is not unicode:
            # If sfEvent.data is a dict or list, convert it to a string first, as
            # those types do not have a unicode converter.
            if type(sfEvent.data) is str:
                storeData = unicode(sfEvent.data, 'utf-8', errors='replace')
            else:
                try:
                    storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace')
                except BaseException as e:
                    self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data)))
        else:
            storeData = sfEvent.data

        if truncateSize > 0:
            storeData = storeData[0:truncateSize]

        if sfEvent.sourceEventHash in ["", None]:
            self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!")

        qry = "INSERT INTO tbl_scan_results \
            (scan_instance_id, hash, type, generated, confidence, \
            visibility, risk, module, data, source_event_hash) \
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
        qvals = [instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated,
                 sfEvent.confidence, sfEvent.visibility, sfEvent.risk,
                 sfEvent.module, storeData, sfEvent.sourceEventHash]

        #print "STORING: " + str(qvals)

        try:
            self.dbh.execute(qry, qvals)
            self.conn.commit()
            return None
        except sqlite3.Error as e:
            self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " + e.args[0])

    # List of all previously run scans
    def scanInstanceList(self):
        # SQLite doesn't support OUTER JOINs, so we need a work-around that
        # does a UNION of scans with results and scans without results to 
        # get a complete listing.
        qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \
            FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \
            AND r.type <> 'ROOT' GROUP BY i.guid \
            UNION ALL \
            SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
            ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \
            FROM tbl_scan_instance i  WHERE i.guid NOT IN ( \
            SELECT distinct scan_instance_id FROM tbl_scan_results WHERE type <> 'ROOT') \
            ORDER BY started DESC"
        try:
            self.dbh.execute(qry)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan list: " + e.args[0])

    # History of data from the scan
    def scanResultHistory(self, instanceId):
        qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \
                type, COUNT(*) FROM tbl_scan_results \
                WHERE scan_instance_id = ? GROUP BY hourmin, type"
        qvars = [instanceId]
        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when fetching scan history: " + e.args[0])


    # Get the source IDs, types and data for a set of IDs
    def scanElementSourcesDirect(self, instanceId, elementIdList):
        # the output of this needs to be aligned with scanResultEvent,
        # as other functions call both expecting the same output.
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND c.hash in ("
        qvars = [instanceId]

        for hashId in elementIdList:
            qry = qry + "'" + hashId + "',"
        qry += "'')"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0])

    # Get the child IDs, types and data for a set of IDs
    def scanElementChildrenDirect(self, instanceId, elementIdList):
        # the output of this needs to be aligned with scanResultEvent,
        # as other functions call both expecting the same output.
        qry = "SELECT ROUND(c.generated) AS generated, c.data, \
            s.data as 'source_data', \
            c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \
            c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \
            c.false_positive as 'fp', s.false_positive as 'parent_fp' \
            FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \
            WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
            s.scan_instance_id = c.scan_instance_id AND \
            t.event = c.type AND s.hash in ("
        qvars = [instanceId]

        for hashId in elementIdList:
            qry = qry + "'" + hashId + "',"
        qry += "'')"

        try:
            self.dbh.execute(qry, qvars)
            return self.dbh.fetchall()
        except sqlite3.Error as e:
            self.sf.error("SQL error encountered when getting child element IDs: " + e.args[0])

    # Get the full set of upstream IDs which are parents to the 
    # supplied set of IDs.
    # Data has to be in the format of output from scanElementSourcesDirect
    # and produce output in the same format.
    def scanElementSourcesAll(self, instanceId, childData):
        # Get the first round of source IDs for the leafs
        keepGoing = True
        nextIds = list()
        datamap = dict()
        pc = dict()

        for row in childData:
            # these must be unique values!
            parentId = row[9]
            childId = row[8]
            datamap[childId] = row

            if parentId in pc:
                if childId not in pc[parentId]:
                    pc[parentId].append(childId)
            else:
                pc[parentId] = [childId]

            # parents of the leaf set
            if parentId not in nextIds:
                nextIds.append(parentId)

        while keepGoing:
            parentSet = self.scanElementSourcesDirect(instanceId, nextIds)
            nextIds = list()
            keepGoing = False

            for row in parentSet:
                parentId = row[9]
                childId = row[8]
                datamap[childId] = row
                #print childId + " = " + str(row)

                if parentId in pc:
                    if childId not in pc[parentId]:
                        pc[parentId].append(childId)
                else:
                    pc[parentId] = [childId]
                if parentId not in nextIds:
                    nextIds.append(parentId)

                # Prevent us from looping at root
                if parentId != "ROOT":
                    keepGoing = True

        datamap[parentId] = row
        return [datamap, pc]

    # Get the full set of downstream IDs which are children of the 
    # supplied set of IDs
    # NOTE FOR NOW THE BEHAVIOR IS NOT THE SAME AS THE scanElementParent*
    # FUNCTIONS - THIS ONLY RETURNS IDS!!
    def scanElementChildrenAll(self, instanceId, parentIds):
        datamap = list()
        keepGoing = True
        nextIds = list()

        nextSet = self.scanElementChildrenDirect(instanceId, parentIds)
        for row in nextSet:
            datamap.append(row[8])

        for row in nextSet:
            if row[8] not in nextIds:
                nextIds.append(row[8])

        while keepGoing:
            nextSet = self.scanElementChildrenDirect(instanceId, nextIds)
            if nextSet == None or len(nextSet) == 0:
                keepGoing = False
                break

            for row in nextSet:
                datamap.append(row[8])
                nextIds = list()
                nextIds.append(row[8])

        return datamap