def clonescan(self, id): sf = SpiderFoot(self.config) dbh = SpiderFootDb(self.config) types = dbh.eventTypes() info = dbh.scanInstanceGet(id) scanconfig = dbh.scanConfigGet(id) scanname = info[0] scantarget = info[1] targetType = None if scanname == "" or scantarget == "" or len(scanconfig) == 0: return self.error("Something went wrong internally.") targetType = sf.targetType(scantarget) if targetType == None: # It must be a name, so wrap quotes around it scantarget = """ + scantarget + """ modlist = scanconfig['_modulesenabled'].split(',') templ = Template(filename='dyn/newscan.tmpl', lookup=self.lookup) return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot, modules=self.config['__modules__'], selectedmods=modlist, scanname=unicode(scanname, 'utf-8', errors='replace'), scantarget=unicode(scantarget, 'utf-8', errors='replace'))
def __init__(self, opts): global sf # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(opts['__database'], timeout=10) if dbh == None: sf.fatal("Could not connect to internal database, and couldn't create " + \ opts['__database']) dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() sf = SpiderFoot(opts) # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') except sqlite3.Error: # .. If not set up, we set it up. try: self.create() except BaseException as e: sf.error("Tried to set up the SpiderFoot database schema, but failed: " + \ e.args[0]) return
def savesettingsraw(self, allopts, token): if str(token) != str(self.token): return json.dumps(["ERROR", "Invalid token (" + str(self.token) + ")."]) try: dbh = SpiderFootDb(self.config) # Reset config to default if allopts == "RESET": dbh.configClear() # Clear it in the DB self.config = deepcopy(self.defaultConfig) # Clear in memory else: useropts = json.loads(allopts) cleanopts = dict() for opt in useropts.keys(): cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0] currentopts = deepcopy(self.config) # Make a new config where the user options override # the current system config. sf = SpiderFoot(self.config) self.config = sf.configUnserialize(cleanopts, currentopts) dbh.configSet(sf.configSerialize(currentopts)) except Exception as e: return json.dumps(["ERROR", "Processing one or more of your inputs failed: " + str(e)]) return json.dumps(["SUCCESS", ""])
def __init__(self, opts): global sf # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(opts['__database'], timeout=10) if dbh == None: sf.error("Could not connect to internal database. Check that " + \ opts['__database'] + " exists and is readable and writable.") dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() sf = SpiderFoot(opts) # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') except sqlite3.Error: sf.error("Found spiderfoot.db but it doesn't appear to be in " \ "the expected state - ensure the schema is created.") return
def savesettings(self, allopts, token): if str(token) != str(self.token): return self.error("Invalid token (" + str(self.token) + ").") try: dbh = SpiderFootDb(self.config) # Reset config to default if allopts == "RESET": dbh.configClear() # Clear it in the DB self.config = deepcopy(self.defaultConfig) # Clear in memory else: useropts = json.loads(allopts) cleanopts = dict() for opt in useropts.keys(): cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0] currentopts = deepcopy(self.config) # Make a new config where the user options override # the current system config. sf = SpiderFoot(self.config) self.config = sf.configUnserialize(cleanopts, currentopts) dbh.configSet(sf.configSerialize(currentopts)) except Exception as e: return self.error("Processing one or more of your inputs failed: " + str(e)) templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup) self.token = random.randint(0, 99999999) return templ.render(opts=self.config, pageid='SETTINGS', updated=True, docroot=self.docroot, token=self.token)
def __init__(self, config): self.defaultConfig = deepcopy(config) dbh = SpiderFootDb(config) # 'config' supplied will be the defaults, let's supplement them # now with any configuration which may have previously been # saved. sf = SpiderFoot(config) self.config = sf.configUnserialize(dbh.configGet(), config)
def scanelementtypediscovery(self, id, eventType): keepGoing = True sf = SpiderFoot(self.config) dbh = SpiderFootDb(self.config) pc = dict() datamap = dict() # Get the events we will be tracing back from leafSet = dbh.scanResultEvent(id, eventType) # Get the first round of source IDs for the leafs nextIds = list() for row in leafSet: # these must be unique values! parentId = row[9] childId = row[8] datamap[childId] = row if pc.has_key(parentId): if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [ childId ] # parents of the leaf set if parentId not in nextIds: nextIds.append(parentId) while keepGoing: parentSet = dbh.scanElementSources(id, nextIds) nextIds = list() keepGoing = False for row in parentSet: parentId = row[9] childId = row[8] datamap[childId] = row #print childId + " = " + str(row) if pc.has_key(parentId): if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [ childId ] if parentId not in nextIds: nextIds.append(parentId) # Prevent us from looping at root if parentId != "ROOT": keepGoing = True datamap[parentId] = row #print pc retdata = dict() retdata['tree'] = sf.dataParentChildToTree(pc) retdata['data'] = datamap return json.dumps(retdata, ensure_ascii=False)
def startscan(self, scanname, scantarget, modulelist, typelist): modopts = dict() # Not used yet as module options are set globally modlist = list() sf = SpiderFoot(self.config) dbh = SpiderFootDb(self.config) types = dbh.eventTypes() [scanname, scantarget] = self.cleanUserInput([scanname, scantarget]) if scanname == "" or scantarget == "": return self.error("Form incomplete.") if typelist == "" and modulelist == "": return self.error("Form incomplete.") if modulelist != "": modlist = modulelist.replace('module_', '').split(',') else: typesx = typelist.replace('type_', '').split(',') # 1. Find all modules that produce the requested types modlist = sf.modulesProducing(typesx) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # Add our mandatory storage module.. if "sfp__stor_db" not in modlist: modlist.append("sfp__stor_db") modlist.sort() # For now we don't permit multiple simultaneous scans for thread in threading.enumerate(): if thread.name.startswith("SF_"): templ = Template(filename='dyn/newscan.tmpl', lookup=self.lookup) return templ.render(modules=self.config['__modules__'], alreadyRunning=True, runningScan=thread.name[3:], types=types, pageid="NEWSCAN") # Start running a new scan self.scanner = SpiderFootScanner(scanname, scantarget.lower(), modlist, self.config, modopts) t = threading.Thread(name="SF_" + scanname, target=self.scanner.startScan) t.start() templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup) return templ.render(id=self.scanner.myId, name=scanname, status=self.scanner.status, pageid="SCANLIST")
def scanviz(self, id, gexf="0"): types = list() dbh = SpiderFootDb(self.config) sf = SpiderFoot(self.config) data = dbh.scanResultEvent(id, filterFp=True) scan = dbh.scanInstanceGet(id) root = scan[1] if gexf != "0": cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return sf.buildGraphGexf([root], "SpiderFoot Export", data) else: return sf.buildGraphJson([root], data)
def optsexport(self, pattern): sf = SpiderFoot(self.config) conf = sf.configSerialize(self.config) content = "" for opt in sorted(conf): if ":_" in opt or opt.startswith("_"): continue if not pattern: content += opt + "=" + str(conf[opt]) + "\n" else: if pattern in opt: content += opt + "=" + str(conf[opt]) + "\n" cherrypy.response.headers['Content-Disposition'] = 'attachment; filename="SpiderFoot.cfg"' cherrypy.response.headers['Content-Type'] = "text/plain" return content
def scanelementtypediscovery(self, id, eventType): sf = SpiderFoot(self.config) dbh = SpiderFootDb(self.config) pc = dict() datamap = dict() # Get the events we will be tracing back from leafSet = dbh.scanResultEvent(id, eventType) [datamap, pc] = dbh.scanElementSourcesAll(id, leafSet) # Delete the ROOT key as it adds no value from a viz perspective del pc['ROOT'] retdata = dict() retdata['tree'] = sf.dataParentChildToTree(pc) retdata['data'] = datamap return json.dumps(retdata, ensure_ascii=False)
def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name return
def savesettings(self, allopts, token, configFile=None): if str(token) != str(self.token): return self.error("Invalid token (" + str(self.token) + ").") if configFile: # configFile seems to get set even if a file isn't uploaded if configFile.file: contents = configFile.file.read() try: tmp = dict() for line in contents.split("\n"): if "=" not in line: continue l = line.strip().split("=") if len(l) == 1: l[1] = "" tmp[l[0]] = l[1] allopts = json.dumps(tmp) except BaseException as e: return self.error("Failed to parse input file. Was it generated from SpiderFoot? (" + str(e) + ")") try: dbh = SpiderFootDb(self.config) # Reset config to default if allopts == "RESET": dbh.configClear() # Clear it in the DB self.config = deepcopy(self.defaultConfig) # Clear in memory else: useropts = json.loads(allopts) cleanopts = dict() for opt in useropts.keys(): cleanopts[opt] = self.cleanUserInput([useropts[opt]])[0] currentopts = deepcopy(self.config) # Make a new config where the user options override # the current system config. sf = SpiderFoot(self.config) self.config = sf.configUnserialize(cleanopts, currentopts) dbh.configSet(sf.configSerialize(self.config)) except Exception as e: return self.error("Processing one or more of your inputs failed: " + str(e)) templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup) self.token = random.randint(0, 99999999) return templ.render(opts=self.config, pageid='SETTINGS', updated=True, docroot=self.docroot, token=self.token)
def scanvizmulti(self, ids, gexf="1"): types = list() dbh = SpiderFootDb(self.config) sf = SpiderFoot(self.config) data = list() roots = list() for id in ids.split(','): data = data + dbh.scanResultEvent(id, filterFp=True) roots.append(dbh.scanInstanceGet(id)[1]) if gexf != "0": cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return sf.buildGraphGexf(roots, "SpiderFoot Export", data) else: # Not implemented yet return None
def rerunscan(self, id): # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modopts = dict() # Not used yet as module options are set globally modlist = list() sf = SpiderFoot(cfg) dbh = SpiderFootDb(cfg) info = dbh.scanInstanceGet(id) scanconfig = dbh.scanConfigGet(id) scanname = info[0] scantarget = info[1] targetType = None if len(scanconfig) == 0: return self.error("Something went wrong internally.") modlist = scanconfig['_modulesenabled'].split(',') targetType = sf.targetType(scantarget) if targetType == None: # It must then be a name, as a re-run scan should always have a clean # target. targetType = "HUMAN_NAME" if targetType != "HUMAN_NAME": scantarget = scantarget.lower() # Start running a new scan newId = sf.genScanInstanceGUID(scanname) t = SpiderFootScanner(scanname, scantarget, targetType, newId, modlist, cfg, modopts) t.start() # Wait until the scan has initialized while globalScanStatus.getStatus(newId) == None: print "[info] Waiting for the scan to initialize..." time.sleep(1) templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup) return templ.render(id=newId, name=unicode(scanname, 'utf-8', errors='replace'), docroot=self.docroot, status=globalScanStatus.getStatus(newId), pageid="SCANLIST")
def rerunscanmulti(self, ids): # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modopts = dict() # Not used yet as module options are set globally modlist = list() sf = SpiderFoot(cfg) dbh = SpiderFootDb(cfg) for id in ids.split(","): info = dbh.scanInstanceGet(id) scanconfig = dbh.scanConfigGet(id) scanname = info[0] scantarget = info[1] targetType = None if len(scanconfig) == 0: return self.error("Something went wrong internally.") modlist = scanconfig['_modulesenabled'].split(',') targetType = sf.targetType(scantarget) if targetType == None: # Should never be triggered for a re-run scan.. return self.error("Invalid target type. Could not recognize it as " + \ "a human name, IP address, IP subnet, ASN, domain name or host name.") # Start running a new scan newId = sf.genScanInstanceGUID(scanname) t = SpiderFootScanner(unicode(scanname, 'utf-8', errors='replace'), unicode(scantarget, 'utf-8', errors='replace').lower(), targetType, newId, modlist, cfg, modopts) t.start() # Wait until the scan has initialized while globalScanStatus.getStatus(newId) == None: print "[info] Waiting for the scan to initialize..." time.sleep(1) templ = Template(filename='dyn/scanlist.tmpl', lookup=self.lookup) return templ.render(rerunscans=True, docroot=self.docroot, pageid="SCANLIST")
def __init__(self, config): self.defaultConfig = deepcopy(config) dbh = SpiderFootDb(config) # 'config' supplied will be the defaults, let's supplement them # now with any configuration which may have previously been # saved. sf = SpiderFoot(config) self.config = sf.configUnserialize(dbh.configGet(), config) if self.config['__webaddr'] == "0.0.0.0": addr = "<IP of this host>" else: addr = self.config['__webaddr'] print "" print "" print "*************************************************************" print " Use SpiderFoot by starting your web browser of choice and " print " browse to http://" + addr + ":" + str(self.config['__webport']) print "*************************************************************" print "" print ""
def savesettings(self, allopts): try: dbh = SpiderFootDb(self.config) # Reset config to default if allopts == "RESET": dbh.configClear() # Clear it in the DB self.config = deepcopy(self.defaultConfig) # Clear in memory else: useropts = json.loads(allopts) currentopts = deepcopy(self.config) # Make a new config where the user options override # the current system config. sf = SpiderFoot(self.config) self.config = sf.configUnserialize(useropts, currentopts) dbh.configSet(sf.configSerialize(currentopts)) except Exception as e: return self.error("Processing one or more of your inputs failed: " + str(e)) templ = Template(filename='dyn/opts.tmpl', lookup=self.lookup) return templ.render(opts=self.config, pageid='SETTINGS', updated=True)
def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name # Override the default DNS server if self.config['_dnsserver'] != "": resolver = dns.resolver.Resolver() resolver.nameservers = [ self.config['_dnsserver'] ] dns.resolver.override_system_resolver(resolver) else: dns.resolver.restore_system_resolver() return
def __init__(self, opts, init=False): self.sf = SpiderFoot(opts) # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10) if dbh is None: self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database']) dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') self.conn.create_function("REGEXP", 2, __dbregex__) except sqlite3.Error: # .. If not set up, we set it up. try: self.create() init = True except BaseException as e: self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0]) return if init: print "Attempting to verify database and update if necessary..." for qry in self.createTypeQueries: try: self.dbh.execute(qry) self.conn.commit() except BaseException as e: continue self.conn.commit()
def test_handleEvent_domain_whois_event_data_not_containing_webframework_string_should_not_create_event( self): sf = SpiderFoot(self.default_options) module = sfp_webframework() module.setup(sf, dict()) target_value = 'spiderfoot.net' target_type = 'INTERNET_NAME' target = SpiderFootTarget(target_value, target_type) module.setTarget(target) def new_notifyListeners(self, event): raise Exception(f"Raised event {event.eventType}: {event.data}") module.notifyListeners = new_notifyListeners.__get__( module, sfp_webframework) event_type = 'ROOT' event_data = 'example data' event_module = '' source_event = '' evt = SpiderFootEvent(event_type, event_data, event_module, source_event) event_type = 'TARGET_WEB_CONTENT' event_data = 'example data' event_module = 'example module' source_event = evt evt = SpiderFootEvent(event_type, event_data, event_module, source_event) evt.actualSource = "https://spiderfoot.net/" result = module.handleEvent(evt) self.assertIsNone(result)
def test_handleEvent_event_data_affiliate_internet_name_not_matching_ad_server_should_not_return_event(self): sf = SpiderFoot(self.default_options) module = sfp_stevenblack_hosts() module.setup(sf, dict()) target_value = 'spiderfoot.net' target_type = 'INTERNET_NAME' target = SpiderFootTarget(target_value, target_type) module.setTarget(target) module.opts['_fetchtimeout'] = 15 module.optdescs['_fetchtimeout'] = '' module.opts['_useragent'] = '' module.optdescs['_useragent'] = '' def new_notifyListeners(self, event): raise Exception(f"Raised event {event.eventType}: {event.data}") module.notifyListeners = new_notifyListeners.__get__(module, sfp_stevenblack_hosts) event_type = 'ROOT' event_data = 'example data' event_module = '' source_event = '' evt = SpiderFootEvent(event_type, event_data, event_module, source_event) event_type = 'AFFILIATE_INTERNET_NAME' event_data = 'no.ads.safe.local' event_module = 'example module' source_event = evt evt = SpiderFootEvent(event_type, event_data, event_module, source_event) result = module.handleEvent(evt) self.assertIsNone(result)
def test_handleEvent_no_api_key_should_set_errorState(self): """ Test handleEvent(self, event) """ sf = SpiderFoot(self.default_options) module = sfp_etherscan() module.setup(sf, dict()) target_value = 'example target value' target_type = 'IP_ADDRESS' target = SpiderFootTarget(target_value, target_type) module.setTarget(target) event_type = 'ROOT' event_data = 'example data' event_module = '' source_event = '' evt = SpiderFootEvent(event_type, event_data, event_module, source_event) result = module.handleEvent(evt) self.assertIsNone(result) self.assertTrue(module.errorState)
def scanopts(self, id): ret = dict() dbh = SpiderFootDb(self.config) ret['config'] = dbh.scanConfigGet(id) ret['configdesc'] = dict() for key in ret['config'].keys(): if ':' not in key: ret['configdesc'][key] = self.config['__globaloptdescs__'][key] else: [modName, modOpt] = key.split(':') if modName not in self.config['__modules__'].keys(): continue if modOpt not in self.config['__modules__'][modName][ 'optdescs'].keys(): continue ret['configdesc'][key] = self.config['__modules__'][modName][ 'optdescs'][modOpt] sf = SpiderFoot(self.config) meta = dbh.scanInstanceGet(id) if meta[3] != 0: started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[3])) else: started = "Not yet" if meta[4] != 0: finished = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[4])) else: finished = "Not yet" ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]] return json.dumps(ret)
def test_handleEvent(self): """ Test handleEvent(self, event) """ sf = SpiderFoot(self.default_options) module = sfp_venmo() module.setup(sf, dict()) target_value = 'example target value' target_type = 'IP_ADDRESS' target = SpiderFootTarget(target_value, target_type) module.setTarget(target) event_type = 'ROOT' event_data = 'example data' event_module = '' source_event = '' evt = SpiderFootEvent(event_type, event_data, event_module, source_event) result = module.handleEvent(evt) self.assertIsNone(result)
def test_fetchUrl_argument_url_invalid_url_should_return_None(self): """ Test fetchUrl(self, url, fatal=False, cookies=None, timeout=30, useragent="SpiderFoot", headers=None, noLog=False, postData=None, dontMangle=False, sizeLimit=None, headOnly=False, verify=False) """ sf = SpiderFoot(self.default_options) res = sf.fetchUrl("") self.assertEqual(None, res) res = sf.fetchUrl("://spiderfoot.net/") self.assertEqual(None, res) res = sf.fetchUrl("file:///etc/hosts") self.assertEqual(None, res) res = sf.fetchUrl("irc://spiderfoot.net:6697/") self.assertEqual(None, res)
def test_valid_email_should_return_a_boolean(self): """ Test validEmail(self, email) """ sf = SpiderFoot(dict()) valid_email = sf.validEmail(None) self.assertIsInstance(valid_email, bool) self.assertFalse(valid_email) valid_email = sf.validEmail([]) self.assertIsInstance(valid_email, bool) self.assertFalse(valid_email) valid_email = sf.validEmail('root@localhost') self.assertIsInstance(valid_email, bool) self.assertFalse(valid_email) valid_email = sf.validEmail('*****@*****.**') self.assertIsInstance(valid_email, bool) self.assertTrue(valid_email)
def test_is_domain_invalid_domain_should_return_false(self): """ Test isDomain(self, hostname, tldList) """ sf = SpiderFoot(self.default_options) sf.opts['_internettlds'] = self.test_tlds invalid_types = [None, "", list(), dict()] for invalid_type in invalid_types: with self.subTest(invalid_type=invalid_type): is_domain = sf.isDomain(invalid_type, sf.opts.get('_internettlds')) self.assertIsInstance(is_domain, bool) self.assertFalse(is_domain) is_domain = sf.isDomain("local", sf.opts.get('_internettlds')) self.assertIsInstance(is_domain, bool) self.assertFalse(is_domain) is_domain = sf.isDomain("spiderfoot.net\n.com", sf.opts.get('_internettlds')) self.assertIsInstance(is_domain, bool) self.assertFalse(is_domain)
def test_host_domain_should_return_a_string(self): """ Test hostDomain(self, hostname, tldList) """ sf = SpiderFoot(self.default_options) sf.opts['_internettlds'] = self.test_tlds host_domain = sf.hostDomain('www.spiderfoot.net', sf.opts.get('_internettlds')) self.assertIsInstance(host_domain, str) self.assertEqual('spiderfoot.net', host_domain) host_domain = sf.hostDomain('spiderfoot.net', sf.opts.get('_internettlds')) self.assertIsInstance(host_domain, str) self.assertEqual('spiderfoot.net', host_domain) host_domain = sf.hostDomain('abc.www.spiderfoot.net', sf.opts.get('_internettlds')) self.assertIsInstance(host_domain, str) self.assertEqual('spiderfoot.net', host_domain)
def test_validPhoneNumber_should_return_a_boolean(self): """ Test validPhoneNumber(self, phone) """ sf = SpiderFoot(dict()) invalid_types = [None, "", list(), dict(), int()] for invalid_type in invalid_types: with self.subTest(invalid_type=invalid_type): valid_phone = sf.validPhoneNumber(invalid_type) self.assertIsInstance(valid_phone, bool) self.assertFalse(valid_phone) valid_phone = sf.validPhoneNumber('+1234567890') self.assertIsInstance(valid_phone, bool) self.assertFalse(valid_phone) valid_phone = sf.validPhoneNumber('+12345678901234567890') self.assertIsInstance(valid_phone, bool) self.assertFalse(valid_phone) valid_phone = sf.validPhoneNumber('+12345678901') self.assertIsInstance(valid_phone, bool) self.assertTrue(valid_phone)
def test_parse_iban_numbers_should_return_a_list(self): """ Test parseIBANNumbers(self, data) """ sf = SpiderFoot(self.default_options) invalid_types = [None, "", list(), dict()] for invalid_type in invalid_types: with self.subTest(invalid_type=invalid_type): ibans = sf.parseIBANNumbers(invalid_type) self.assertIsInstance(ibans, list) # Example IBANS from https://www.iban.com/structure ibans = [ "AL35202111090000000001234567", "AD1400080001001234567890", "AT483200000012345864", "AZ96AZEJ00000000001234567890", "BH02CITI00001077181611", "BY86AKBB10100000002966000000", "BE71096123456769", "BA393385804800211234", "BR1500000000000010932840814P2", "BG18RZBB91550123456789", "CR23015108410026012345", "HR1723600001101234565", "CY21002001950000357001234567", "CZ5508000000001234567899", "DK9520000123456789", "DO22ACAU00000000000123456789", "EG800002000156789012345180002", "SV43ACAT00000000000000123123", "EE471000001020145685", "FO9264600123456789", "FI1410093000123458", "FR7630006000011234567890189", "GE60NB0000000123456789", "DE75512108001245126199", "GI04BARC000001234567890", "GR9608100010000001234567890", "GL8964710123456789", "GT20AGRO00000000001234567890", "VA59001123000012345678", "HU93116000060000000012345676", "IS750001121234563108962099", "IQ20CBIQ861800101010500", "IE64IRCE92050112345678", "IL170108000000012612345", "IT60X0542811101000000123456", "JO71CBJO0000000000001234567890", "KZ563190000012344567", "XK051212012345678906", "KW81CBKU0000000000001234560101", "LV97HABA0012345678910", "LB92000700000000123123456123", "LI7408806123456789012", "LT601010012345678901", "LU120010001234567891", "MT31MALT01100000000000000000123", "MR1300020001010000123456753", "MU43BOMM0101123456789101000MUR", "MD21EX000000000001234567", "MC5810096180790123456789085", "ME25505000012345678951", "NL02ABNA0123456789", "MK07200002785123453", "NO8330001234567", "PK36SCBL0000001123456702", "PS92PALS000000000400123456702", "PL10105000997603123456789123", "PT50002700000001234567833", "QA54QNBA000000000000693123456", "RO09BCYP0000001234567890", "LC14BOSL123456789012345678901234", "SM76P0854009812123456789123", "ST23000200000289355710148", "SA4420000001234567891234", "RS35105008123123123173", "SC52BAHL01031234567890123456USD", "SK8975000000000012345671", "SI56192001234567892", "ES7921000813610123456789", "SE7280000810340009783242", "CH5604835012345678009", "TL380010012345678910106", "TN5904018104004942712345", "TR320010009999901234567890", "UA903052992990004149123456789", "AE460090000000123456789", "GB33BUKB20201555555555", "VG21PACG0000000123456789" ] for iban in ibans: with self.subTest(iban=iban): parse_ibans = sf.parseIBANNumbers(iban) self.assertIsInstance(parse_ibans, list) self.assertIn(iban, parse_ibans) # Invalid IBANs ibans = [ # Invalid country code "ZZ21PACG0000000123456789", # Invalid length for country code "VG123456789012345", # Invalid mod 97 remainder "VG21PACG0000000123456111" ] for iban in ibans: with self.subTest(iban=iban): parse_ibans = sf.parseIBANNumbers(iban) self.assertIsInstance(parse_ibans, list) self.assertNotIn(iban, parse_ibans)
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_talosintel() module.setup(sf, dict())
sfConfig['_debug'] = True else: sfConfig['_debug'] = False if args.q or args.o == "json": sfConfig['__logging'] = False if args.l: (addr, port) = args.l.split(":") sfConfig['__webaddr'] = addr sfConfig['__webport'] = int(port) else: sfConfig['__logstdout'] = True sfModules = dict() sft = SpiderFoot(sfConfig) # Go through each module in the modules directory with a .py extension for filename in os.listdir(sft.myPath() + '/modules/'): if filename.startswith("sfp_") and filename.endswith(".py"): # Skip the module template and debugging modules if filename == "sfp_template.py" or filename == 'sfp_stor_print.py': continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() sfModules[modName]['name'] = sfModules[modName][ 'object'].__doc__.split(":", 5)[0]
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_openphish() module.setup(sf, dict())
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_dnsresolve() module.setup(sf, dict())
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_etherscan() module.setup(sf, dict())
def index(self): sf = SpiderFoot(self.config) # Look for referenced templates in the current directory only templ = Template(filename='dyn/setup.tmpl', lookup=self.lookup) return templ.render(stage=1, config=self.config, path=os.path.dirname(sf.myPath()))
def startscan(self, scanname, scantarget, modulelist, typelist): global globalScanStatus # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modopts = dict() # Not used yet as module options are set globally modlist = list() sf = SpiderFoot(cfg) dbh = SpiderFootDb(cfg) types = dbh.eventTypes() targetType = None [scanname, scantarget] = self.cleanUserInput([scanname, scantarget]) if scanname == "" or scantarget == "": return self.error("Form incomplete.") if typelist == "" and modulelist == "": return self.error("Form incomplete.") if modulelist != "": modlist = modulelist.replace('module_', '').split(',') else: typesx = typelist.replace('type_', '').split(',') # 1. Find all modules that produce the requested types modlist = sf.modulesProducing(typesx) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # Add our mandatory storage module.. if "sfp__stor_db" not in modlist: modlist.append("sfp__stor_db") modlist.sort() targetType = sf.targetType(scantarget) if targetType is None: return self.error("Invalid target type. Could not recognize it as " + \ "an IP address, IP subnet, domain name or host name.") # Start running a new scan scanId = sf.genScanInstanceGUID(scanname) t = SpiderFootScanner(scanname, scantarget.lower(), targetType, scanId, modlist, cfg, modopts) t.start() # Wait until the scan has initialized while globalScanStatus.getStatus(scanId) is None: print "[info] Waiting for the scan to initialize..." time.sleep(1) templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup) return templ.render(id=scanId, name=scanname, docroot=self.docroot, status=globalScanStatus.getStatus(scanId), pageid="SCANLIST")
class SpiderFootScanner: moduleInstances = None status = "UNKNOWN" myId = None def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name return # Status of the currently running scan (if any) def scanStatus(self, id): if id != self.myId: return "UNKNOWN" return self.status # Stop a scan (id variable is unnecessary for now given that only one simultaneous # scan is permitted.) def stopScan(self, id): if id != self.myId: return None if self.moduleInstances == None: return None for modName in self.moduleInstances.keys(): self.moduleInstances[modName].stopScanning() # Start running a scan def startScan(self): self.moduleInstances = dict() dbh = SpiderFootDb(self.config) self.sf.setDbh(dbh) aborted = False # Create a unique ID for this scan and create it in the back-end DB. self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target) self.sf.setScanId(self.config['__guid__']) self.myId = self.config['__guid__'] dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target) dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING') self.status = "STARTING" # Save the config current set for this scan self.config['_modulesenabled'] = self.moduleList dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config)) self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.") # moduleList = list of modules the user wants to run try: for modName in self.moduleList: if modName == '': continue module = __import__('modules.' + modName, globals(), locals(), [modName]) mod = getattr(module, modName)() mod.__name__ = modName # A bit hacky: we pass the database object as part of the config. This # object should only be used by the internal SpiderFoot modules writing # to the database, which at present is only sfp_stor_db. # Individual modules cannot create their own SpiderFootDb instance or # we'll get database locking issues, so it all goes through this. self.config['__sfdb__'] = dbh # Set up the module # Configuration is a combined global config with module-specific options #modConfig = deepcopy(self.config) modConfig = self.config['__modules__'][modName]['opts'] for opt in self.config.keys(): modConfig[opt] = self.config[opt] mod.clearListeners() # clear any listener relationships from the past mod.setup(self.sf, self.target, modConfig) self.moduleInstances[modName] = mod self.sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in self.moduleInstances.values(): for listenerModule in self.moduleInstances.values(): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() != None: module.registerListener(listenerModule) dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING') self.status = "RUNNING" # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI") dbh.scanEventStore(self.config['__guid__'], rootEvent) # Start the modules sequentially. for module in self.moduleInstances.values(): # Check in case the user requested to stop the scan between modules initializing if module.checkForStop(): dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING') self.status = "ABORTING" aborted = True break # Many modules' start() method will return None, as most will rely on # notifications during the scan from other modules. module.start() # Check if any of the modules ended due to being stopped for module in self.moduleInstances.values(): if module.checkForStop(): aborted = True if aborted: self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED') self.status = "ABORTED" else: self.sf.status("Scan [" + self.config['__guid__'] + "] completed.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED') self.status = "FINISHED" except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.sf.error("Unhandled exception encountered during scan. " + \ "Please report this as a bug: " + \ repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e)) dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED') self.status = "ERROR-FAILED" self.moduleInstances = None dbh.close() self.sf.setDbh(None) self.sf.setScanId(None)
class SpiderFootDb: sf = None dbh = None conn = None # Queries for creating the SpiderFoot database createQueries = [ "PRAGMA journal_mode=WAL", "CREATE TABLE tbl_event_types ( \ event VARCHAR NOT NULL PRIMARY KEY, \ event_descr VARCHAR NOT NULL, \ event_raw INT NOT NULL DEFAULT 0 \ )", "CREATE TABLE tbl_config ( \ scope VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL, \ PRIMARY KEY (scope, opt) \ )", "CREATE TABLE tbl_scan_instance ( \ guid VARCHAR NOT NULL PRIMARY KEY, \ name VARCHAR NOT NULL, \ seed_target VARCHAR NOT NULL, \ created INT DEFAULT 0, \ started INT DEFAULT 0, \ ended INT DEFAULT 0, \ status VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_log ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ generated INT NOT NULL, \ component VARCHAR, \ type VARCHAR NOT NULL, \ message VARCHAR \ )", "CREATE TABLE tbl_scan_config ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ component VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_results ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ hash VARCHAR NOT NULL, \ type VARCHAR NOT NULL REFERENCES tbl_event_types(event), \ generated INT NOT NULL, \ confidence INT NOT NULL DEFAULT 100, \ visibility INT NOT NULL DEFAULT 100, \ risk INT NOT NULL DEFAULT 0, \ module VARCHAR NOT NULL, \ data VARCHAR, \ source_event_hash VARCHAR DEFAULT 'ROOT' \ )", "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)", "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)", "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)", "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)", "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_IP_SUBNET', 'Affiliate - IP Address - Subnet', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DEVICE_TYPE', 'Device Type', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DNS_TEXT', 'DNS TXT Record', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('DOMAIN_NAME', 'Domain Name', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('EMAILADDR', 'Email Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('GEOINFO', 'Physical Location', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('HTTP_CODE', 'HTTP Status Code', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('HUMAN_NAME', 'Human Name', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('INTERESTING_FILE', 'Interesting File', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('JUNK_FILE', 'Junk File', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('INTERNET_NAME', 'Internet Name', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('IP_ADDRESS', 'IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_NETBLOCK', 'Owned Netblock with Malicious IP', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('OPERATING_SYSTEM', 'Operating System', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PASTEBIN_CONTENT', 'PasteBin Content', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_FORM', 'URL (Form)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java applet)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_STATIC', 'URL (Purely Static)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_BANNER', 'Web Server', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0)", "INSERT INTO tbl_event_types (event, event_descr, event_raw) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0)" ] def __init__(self, opts): self.sf = SpiderFoot(opts) # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10) if dbh == None: self.sf.fatal("Could not connect to internal database, and couldn't create " + \ opts['__database']) dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') self.conn.create_function("REGEXP", 2, __dbregex__) except sqlite3.Error: # .. If not set up, we set it up. try: self.create() except BaseException as e: self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + \ e.args[0]) return # # Back-end database operations # # Create the back-end schema def create(self): try: for qry in self.createQueries: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: raise BaseException("SQL error encountered when setting up database: " + e.args[0]) # Close the database handle def close(self): self.dbh.close() # Search results # criteria is search criteria such as: # - scan_id (search within a scan, if omitted search all) # - type (search a specific type, if omitted search all) # - value (search values for a specific string, if omitted search all) # - regex (search values for a regular expression) # ** at least two criteria must be set ** def search(self, criteria): if criteria.values().count(None) == 3: return False qvars = list() qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, c.scan_instance_id \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.source_event_hash = s.hash " if criteria.get('scan_id') != None: qry += "AND c.scan_instance_id = ? " qvars.append(criteria['scan_id']) if criteria.get('type') != None: qry += " AND c.type = ? " qvars.append(criteria['type']) if criteria.get('value') != None: qry += " AND c.data LIKE ? " qvars.append(criteria['value']) if criteria.get('regex') != None: qry += " AND c.data REGEXP ? " qvars.append(criteria['regex']) qry = qry + " ORDER BY c.data" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching search results: " + e.args[0]) # Get event types def eventTypes(self): qry = "SELECT event_descr, event, event_raw FROM tbl_event_types" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving event types:" + e.args[0]) # Log an event to the database def scanLogEvent(self, instanceId, classification, message, component=None): if component == None: component = "SpiderFoot" qry = "INSERT INTO tbl_scan_log \ (scan_instance_id, generated, component, type, message) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, time.time() * 1000, component, classification, message )) self.conn.commit() except sqlite3.Error as e: if "locked" in e.args[0]: # TODO: Do something smarter here to handle locked databases self.sf.fatal("Unable to log event in DB: " + e.args[0]) else: self.sf.fatal("Unable to log event in DB: " + e.args[0]) return True # Store a scan instance def scanInstanceCreate(self, instanceId, scanName, scanTarget): qry = "INSERT INTO tbl_scan_instance \ (guid, name, seed_target, created, status) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED' )) self.conn.commit() except sqlite3.Error as e: self.sf.fatal("Unable to create instance in DB: " + e.args[0]) return True # Update the start time, end time or status (or all 3) of a scan instance def scanInstanceSet(self, instanceId, started=None, ended=None, status=None): qvars = list() qry = "UPDATE tbl_scan_instance SET " if started != None: qry += " started = ?," qvars.append(started) if ended != None: qry += " ended = ?," qvars.append(ended) if status != None: qry += " status = ?," qvars.append(status) # guid = guid is a little hack to avoid messing with , placement above qry += " guid = guid WHERE guid = ?" qvars.append(instanceId) try: self.dbh.execute(qry, qvars) self.conn.commit() except sqlite3.Error: self.sf.fatal("Unable to set information for the scan instance.") # Return info about a scan instance (name, target, created, started, # ended, status) - don't need this yet - untested def scanInstanceGet(self, instanceId): qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \ ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \ FROM tbl_scan_instance WHERE guid = ?" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchone() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving scan instance:" + e.args[0]) # Obtain a summary of the results per event type def scanResultSummary(self, instanceId): qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result summary: " + e.args[0]) # Obtain the ROOT event for a scan: Must be same output as scanResultEvent! def scanRootEvent(self, instanceId): qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.source_event_hash = 'ROOT'" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchone() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching ROOT event: " + e.args[0]) # Obtain the data for a scan and event type def scanResultEvent(self, instanceId, eventType='ALL'): qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type" qvars = [instanceId] if eventType != "ALL": qry = qry + " AND c.type = ?" qvars.append(eventType) qry = qry + " ORDER BY c.data" #print "QRY: " + qry try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result events: " + e.args[0]) # Obtain a unique list of elements def scanResultEventUnique(self, instanceId, eventType='ALL'): qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ?" qvars = [instanceId] if eventType != "ALL": qry = qry + " AND type = ?" qvars.append(eventType) qry = qry + " GROUP BY type, data ORDER BY COUNT(*)" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching unique result events: " + e.args[0]) # Get scan logs def scanLogs(self, instanceId, limit=None): qry = "SELECT generated AS generated, component, \ type, message FROM tbl_scan_log WHERE scan_instance_id = ? \ ORDER BY generated DESC" qvars = [instanceId] if limit != None: qry = qry + " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan logs: " + e.args[0]) # Get scan errors def scanErrors(self, instanceId, limit=None): qry = "SELECT generated AS generated, component, \ message FROM tbl_scan_log WHERE scan_instance_id = ? \ AND type = 'ERROR' ORDER BY generated DESC" qvars = [instanceId] if limit != None: qry = qry + " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan errors: " + e.args[0]) # Delete a scan instance def scanInstanceDelete(self, instanceId): qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?" qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?" qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?" qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?" qvars = [instanceId] try: self.dbh.execute(qry1, qvars) self.dbh.execute(qry2, qvars) self.dbh.execute(qry3, qvars) self.dbh.execute(qry4, qvars) self.conn.commit() except sqlite3.Error as e: self.sf.error("SQL error encountered when deleting scan: " + e.args[0]) # Store the default configuration def configSet(self, optMap=dict()): qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [ parts[0], parts[1], optMap[opt] ] else: # Global option qvals = [ "GLOBAL", opt, optMap[opt] ] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive the config from the database def configGet(self): qry = "SELECT scope, opt, val FROM tbl_config" try: retval = dict() self.dbh.execute(qry) for [scope, opt, val] in self.dbh.fetchall(): if scope == "GLOBAL": retval[opt] = val else: retval[scope + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Reset the config to default (clear it from the DB and let the hard-coded # settings in the code take effect.) def configClear(self): qry = "DELETE from tbl_config" try: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: self.sf.error("Unable to clear configuration from the database: " + e.args[0]) # Store a configuration value for a scan def scanConfigSet(self, id, optMap=dict()): qry = "REPLACE INTO tbl_scan_config \ (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [ id, parts[0], parts[1], optMap[opt] ] else: # Global option qvals = [ id, "GLOBAL", opt, optMap[opt] ] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive configuration data for a scan component def scanConfigGet(self, instanceId): qry = "SELECT component, opt, val FROM tbl_scan_config \ WHERE scan_instance_id = ? ORDER BY component, opt" qvars = [instanceId] try: retval = dict() self.dbh.execute(qry, qvars) for [component, opt, val] in self.dbh.fetchall(): if component == "GLOBAL": retval[opt] = val else: retval[component + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Store an event # eventData is a SpiderFootEvent object with the following variables: # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc. # - generated: time the event occurred # - confidence: how sure are we of this data's validity, 0-100 # - visibility: how 'visible' was this data, 0-100 # - risk: how much risk does this data represent, 0-100 # - module: module that generated the event # - data: the actual data, i.e. a URL, port number, webpage content, etc. # - sourceEventHash: hash of the event that triggered this event # And getHash() will return the event hash. def scanEventStore(self, instanceId, sfEvent, truncateSize=0): storeData = '' if type(sfEvent.data) is not unicode: # If sfEvent.data is a dict or list, convert it to a string first, as # those types do not have a unicode converter. if type(sfEvent.data) is str: storeData = unicode(sfEvent.data, 'utf-8', errors='replace') else: try: storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace') except BaseException as e: self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data))) else: storeData = sfEvent.data if truncateSize > 0: storeData = storeData[0:truncateSize] if sfEvent.sourceEventHash in [ "", None]: self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!") qry = "INSERT INTO tbl_scan_results \ (scan_instance_id, hash, type, generated, confidence, \ visibility, risk, module, data, source_event_hash) \ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" qvals = [ instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated, sfEvent.confidence, sfEvent.visibility, sfEvent.risk, sfEvent.module, storeData, sfEvent.sourceEventHash ] #print "STORING: " + str(qvals) try: self.dbh.execute(qry, qvals) self.conn.commit() return None except sqlite3.Error as e: self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " + e.args[0]) # List of all previously run scans def scanInstanceList(self): # SQLite doesn't support OUTER JOINs, so we need a work-around that # does a UNION of scans with results and scans without results to # get a complete listing. qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \ FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \ GROUP BY i.guid \ UNION ALL \ SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \ FROM tbl_scan_instance i WHERE i.guid NOT IN ( \ SELECT distinct scan_instance_id FROM tbl_scan_results) \ ORDER BY started DESC" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan list: " + e.args[0]) # History of data from the scan def scanResultHistory(self, instanceId): qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \ type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ? GROUP BY hourmin, type" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan history: " + e.args[0]) # Get the source IDs, types and data for a set of IDs def scanElementSources(self, instanceId, elementIdList): # the output of this needs to be aligned with scanResultEvent, # as other functions call both expecting the same output. qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.hash in (" qvars = [instanceId] for hashId in elementIdList: qry = qry + "'" + hashId + "'," qry = qry + "'')" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0])
class SpiderFootScanner: moduleInstances = None status = "UNKNOWN" myId = None def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name return # Status of the currently running scan (if any) def scanStatus(self, id): if id != self.myId: return "UNKNOWN" return self.status # Stop a scan (id variable is unnecessary for now given that only one simultaneous # scan is permitted.) def stopScan(self, id): if id != self.myId: return None if self.moduleInstances == None: return None for modName in self.moduleInstances.keys(): self.moduleInstances[modName].stopScanning() # Start running a scan def startScan(self): self.moduleInstances = dict() dbh = SpiderFootDb(self.config) self.sf.setDbh(dbh) aborted = False # Create a unique ID for this scan and create it in the back-end DB. self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target) self.sf.setScanId(self.config['__guid__']) self.myId = self.config['__guid__'] dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target) dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING') self.status = "STARTING" # Save the config current set for this scan self.config['_modulesenabled'] = self.moduleList dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config)) self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.") # moduleList = list of modules the user wants to run try: # Process global options that point to other places for data # If a SOCKS server was specified, set it up if self.config['_socks1type'] != '': socksType = socks.PROXY_TYPE_SOCKS4 socksDns = self.config['_socks6dns'] socksAddr = self.config['_socks2addr'] socksPort = int(self.config['_socks3port']) socksUsername = '' socksPassword = '' if self.config['_socks1type'] == '4': socksType = socks.PROXY_TYPE_SOCKS4 if self.config['_socks1type'] == '5': socksType = socks.PROXY_TYPE_SOCKS5 socksUsername = self.config['_socks4user'] socksPassword = self.config['_socks5pwd'] if self.config['_socks1type'] == 'HTTP': socksType = socks.PROXY_TYPE_HTTP self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \ "(" + socksUsername + ":" + socksPassword + ")") socks.setdefaultproxy(socksType, socksAddr, socksPort, socksDns, socksUsername, socksPassword) # Override the default socket and getaddrinfo calls with the # SOCKS ones socket.socket = socks.socksocket socket.create_connection = socks.create_connection socket.getaddrinfo = socks.getaddrinfo self.sf.updateSocket(socket) # Override the default DNS server if self.config['_dnsserver'] != "": res = dns.resolver.Resolver() res.nameservers = [ self.config['_dnsserver'] ] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.config['_useragent'] = self.sf.optValueToData(self.config['_useragent']) # Get internet TLDs tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata == None: self.config['_internettlds'] = self.sf.optValueToData(self.config['_internettlds']) self.sf.cachePut("internet_tlds", self.config['_internettlds']) else: self.config["_internettlds"] = tlddata.splitlines() for modName in self.moduleList: if modName == '': continue module = __import__('modules.' + modName, globals(), locals(), [modName]) mod = getattr(module, modName)() mod.__name__ = modName # A bit hacky: we pass the database object as part of the config. This # object should only be used by the internal SpiderFoot modules writing # to the database, which at present is only sfp__stor_db. # Individual modules cannot create their own SpiderFootDb instance or # we'll get database locking issues, so it all goes through this. self.config['__sfdb__'] = dbh # Set up the module # Configuration is a combined global config with module-specific options #modConfig = deepcopy(self.config) modConfig = self.config['__modules__'][modName]['opts'] for opt in self.config.keys(): modConfig[opt] = self.config[opt] mod.clearListeners() # clear any listener relationships from the past mod.setup(self.sf, self.target, modConfig) self.moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.config['_socks1type'] != '': mod._updateSocket(socket) self.sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in self.moduleInstances.values(): for listenerModule in self.moduleInstances.values(): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() != None: module.registerListener(listenerModule) dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING') self.status = "RUNNING" # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI") dbh.scanEventStore(self.config['__guid__'], rootEvent) # Start the modules sequentially. for module in self.moduleInstances.values(): # Check in case the user requested to stop the scan between modules initializing if module.checkForStop(): dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING') self.status = "ABORTING" aborted = True break # Many modules' start() method will return None, as most will rely on # notifications during the scan from other modules. module.start() # Check if any of the modules ended due to being stopped for module in self.moduleInstances.values(): if module.checkForStop(): aborted = True if aborted: self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED') self.status = "ABORTED" else: self.sf.status("Scan [" + self.config['__guid__'] + "] completed.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED') self.status = "FINISHED" except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \ "encountered during scan. Please report this as a bug: " + \ repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e)) dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED') self.status = "ERROR-FAILED" self.moduleInstances = None dbh.close() self.sf.setDbh(None) self.sf.setScanId(None)
def test_target_type(self): """ Test targetType(self, target) """ sf = SpiderFoot(dict()) target_type = sf.targetType("0.0.0.0") self.assertEqual('IP_ADDRESS', target_type) target_type = sf.targetType("*****@*****.**") self.assertEqual('EMAILADDR', target_type) target_type = sf.targetType("0.0.0.0/0") self.assertEqual('NETBLOCK_OWNER', target_type) target_type = sf.targetType("+1234567890") self.assertEqual('PHONE_NUMBER', target_type) target_type = sf.targetType('"Human Name"') self.assertEqual('HUMAN_NAME', target_type) target_type = sf.targetType('"abc123"') self.assertEqual('USERNAME', target_type) target_type = sf.targetType("1234567890") self.assertEqual('BGP_AS_OWNER', target_type) target_type = sf.targetType("::1") self.assertEqual('IPV6_ADDRESS', target_type) target_type = sf.targetType("spiderfoot.net") self.assertEqual('INTERNET_NAME', target_type)
def setup_server(): default_config = { '_debug': False, # Debug '__logging': True, # Logging in general '__outputfilter': None, # Event types to filter from modules' output '_useragent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0', # User-Agent to use for HTTP requests '_dnsserver': '', # Override the default resolver '_fetchtimeout': 5, # number of seconds before giving up on a fetch '_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat', '_internettlds_cache': 72, '_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www", '__database': 'spiderfoot.test.db', # note: test database file '__modules__': None, # List of modules. Will be set after start-up. '_socks1type': '', '_socks2addr': '', '_socks3port': '', '_socks4user': '', '_socks5pwd': '', '_torctlport': 9051, '__logstdout': False } default_web_config = {'root': '/'} sfModules = dict() sf = SpiderFoot(default_config) mod_dir = sf.myPath() + '/modules/' for filename in os.listdir(mod_dir): if not filename.startswith("sfp_"): continue if not filename.endswith(".py"): continue # Skip the module template and debugging modules if filename in ('sfp_template.py', 'sfp_stor_print.py'): continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() sfModules[modName]['name'] = sfModules[modName]['object'].meta[ 'name'] sfModules[modName]['cats'] = sfModules[modName]['object'].meta.get( 'categories', list()) sfModules[modName]['group'] = sfModules[modName][ 'object'].meta.get('useCases', list()) if len(sfModules[modName]['cats']) > 1: raise ValueError( f"Module {modName} has multiple categories defined but only one is supported." ) sfModules[modName]['labels'] = sfModules[modName][ 'object'].meta.get('flags', list()) sfModules[modName]['descr'] = sfModules[modName]['object'].meta[ 'summary'] sfModules[modName]['provides'] = sfModules[modName][ 'object'].producedEvents() sfModules[modName]['consumes'] = sfModules[modName][ 'object'].watchedEvents() sfModules[modName]['meta'] = sfModules[modName]['object'].meta if hasattr(sfModules[modName]['object'], 'opts'): sfModules[modName]['opts'] = sfModules[modName]['object'].opts if hasattr(sfModules[modName]['object'], 'optdescs'): sfModules[modName]['optdescs'] = sfModules[modName][ 'object'].optdescs default_config['__modules__'] = sfModules conf = { '/query': { 'tools.encode.text_only': False, 'tools.encode.add_charset': True, }, '/static': { 'tools.staticdir.on': True, 'tools.staticdir.dir': 'static', 'tools.staticdir.root': sf.myPath() } } cherrypy.tree.mount(SpiderFootWebUi(default_web_config, default_config), script_name=default_web_config.get('root'), config=conf)
class SpiderFootDb: sf = None dbh = None conn = None # Queries for creating the SpiderFoot database createQueries = [ "PRAGMA journal_mode=WAL", "CREATE TABLE tbl_event_types ( \ event VARCHAR NOT NULL PRIMARY KEY, \ event_descr VARCHAR NOT NULL, \ event_raw INT NOT NULL DEFAULT 0, \ event_type VARCHAR NOT NULL \ )", "CREATE TABLE tbl_config ( \ scope VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL, \ PRIMARY KEY (scope, opt) \ )", "CREATE TABLE tbl_scan_instance ( \ guid VARCHAR NOT NULL PRIMARY KEY, \ name VARCHAR NOT NULL, \ seed_target VARCHAR NOT NULL, \ created INT DEFAULT 0, \ started INT DEFAULT 0, \ ended INT DEFAULT 0, \ status VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_log ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ generated INT NOT NULL, \ component VARCHAR, \ type VARCHAR NOT NULL, \ message VARCHAR \ )", "CREATE TABLE tbl_scan_config ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ component VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_results ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ hash VARCHAR NOT NULL, \ type VARCHAR NOT NULL REFERENCES tbl_event_types(event), \ generated INT NOT NULL, \ confidence INT NOT NULL DEFAULT 100, \ visibility INT NOT NULL DEFAULT 100, \ risk INT NOT NULL DEFAULT 0, \ module VARCHAR NOT NULL, \ data VARCHAR, \ false_positive INT NOT NULL DEFAULT 0, \ source_event_hash VARCHAR DEFAULT 'ROOT' \ )", "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)", "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)", "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)", "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)", "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ROOT', 'Internal SpiderFoot Root event', 1, 'INTERNAL')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED', 'Account on External Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED_COMPROMISED', 'Hacked Account on External Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED', 'User Account on External Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED_COMPROMISED', 'Hacked User Account on External Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_CATEGORY', 'Affiliate Description - Category', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_ABSTRACT', 'Affiliate Description - Abstract', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('APPSTORE_ENTRY', 'App Store Entry', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AMAZON_S3_BUCKET', 'Amazon S3 Bucket', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BASE64_DATA', 'Base64-encoded Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_URL', 'Darknet Mention URL', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_CONTENT', 'Darknet Mention Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_CATEGORY', 'Description - Category', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_ABSTRACT', 'Description - Abstract', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEVICE_TYPE', 'Device Type', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_TEXT', 'DNS TXT Record', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME', 'Domain Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME_PARENT', 'Domain Name (Parent)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_REGISTRAR', 'Domain Registrar', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_WHOIS', 'Domain Whois', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR', 'Email Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR_COMPROMISED', 'Hacked Email Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ERROR_MESSAGE', 'Error Message', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('GEOINFO', 'Physical Location', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HTTP_CODE', 'HTTP Status Code', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HUMAN_NAME', 'Human Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE', 'Interesting File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE_HISTORIC', 'Historic Interesting File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('JUNK_FILE', 'Junk File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERNET_NAME', 'Internet Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IP_ADDRESS', 'IP Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_EMAILADDR', 'Malicious E-mail Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_NETBLOCK', 'Malicious IP on Owned Netblock', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_WHOIS', 'Netblock Whois', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('OPERATING_SYSTEM', 'Operating System', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_URL', 'Leak Site URL', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_CONTENT', 'Leak Site Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHONE_NUMBER', 'Phone Number', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_ADDRESS', 'Physical Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_COORDINATES', 'Physical Coordinates', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PGP_KEY', 'PGP Public Key', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_HOSTING', 'Hosting Provider', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PUBLIC_CODE_REPO', 'Public Code Repository', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOFTWARE_USED', 'Software Used', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN', 'Open UDP Port', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN_INFO', 'Open UDP Port Information', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_EXTERNAL', 'URL (AdBlocked External)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_INTERNAL', 'URL (AdBlocked Internal)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM', 'URL (Form)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java Applet)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC', 'URL (Purely Static)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM_HISTORIC', 'Historic URL (Form)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH_HISTORIC', 'Historic URL (Uses Flash)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT_HISTORIC', 'Historic URL (Uses Javascript)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK_HISTORIC', 'Historic URL (Uses a Web Framework)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET_HISTORIC', 'Historic URL (Uses Java Applet)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC_HISTORIC', 'Historic URL (Purely Static)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD_HISTORIC', 'Historic URL (Accepts Passwords)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD_HISTORIC', 'Historic URL (Accepts Uploads)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('USERNAME', 'Username', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('VULNERABILITY', 'Vulnerability in Public Domain', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_BANNER', 'Web Server', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0, 'DESCRIPTOR')" ] def __init__(self, opts): self.sf = SpiderFoot(opts) # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10) if dbh is None: self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database']) dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') self.conn.create_function("REGEXP", 2, __dbregex__) except sqlite3.Error: # .. If not set up, we set it up. try: self.create() except BaseException as e: self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0]) return # # Back-end database operations # # Create the back-end schema def create(self): try: for qry in self.createQueries: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: raise BaseException("SQL error encountered when setting up database: " + e.args[0]) # Close the database handle def close(self): self.dbh.close() # Search results # criteria is search criteria such as: # - scan_id (search within a scan, if omitted search all) # - type (search a specific type, if omitted search all) # - value (search values for a specific string, if omitted search all) # - regex (search values for a regular expression) # ** at least two criteria must be set ** def search(self, criteria, filterFp=False): if criteria.values().count(None) == 3: return False qvars = list() qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, c.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.source_event_hash = s.hash " if filterFp: qry += " AND c.false_positive <> 1 " if criteria.get('scan_id') is not None: qry += "AND c.scan_instance_id = ? " qvars.append(criteria['scan_id']) if criteria.get('type') is not None: qry += " AND c.type = ? " qvars.append(criteria['type']) if criteria.get('value') is not None: qry += " AND (c.data LIKE ? OR s.data LIKE ?) " qvars.append(criteria['value']) qvars.append(criteria['value']) if criteria.get('regex') is not None: qry += " AND (c.data REGEXP ? OR s.data REGEXP ?) " qvars.append(criteria['regex']) qvars.append(criteria['regex']) qry += " ORDER BY c.data" try: #print qry #print str(qvars) self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching search results: " + e.args[0]) # Get event types def eventTypes(self): qry = "SELECT event_descr, event, event_raw, event_type FROM tbl_event_types" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving event types:" + e.args[0]) # Log an event to the database def scanLogEvent(self, instanceId, classification, message, component=None): if component is None: component = "SpiderFoot" qry = "INSERT INTO tbl_scan_log \ (scan_instance_id, generated, component, type, message) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, time.time() * 1000, component, classification, message )) self.conn.commit() except sqlite3.Error as e: if "locked" in e.args[0]: # TODO: Do something smarter here to handle locked databases self.sf.fatal("Unable to log event in DB due to lock: " + e.args[0]) else: self.sf.fatal("Unable to log event in DB: " + e.args[0]) return True # Store a scan instance def scanInstanceCreate(self, instanceId, scanName, scanTarget): qry = "INSERT INTO tbl_scan_instance \ (guid, name, seed_target, created, status) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED' )) self.conn.commit() except sqlite3.Error as e: self.sf.fatal("Unable to create instance in DB: " + e.args[0]) return True # Update the start time, end time or status (or all 3) of a scan instance def scanInstanceSet(self, instanceId, started=None, ended=None, status=None): qvars = list() qry = "UPDATE tbl_scan_instance SET " if started is not None: qry += " started = ?," qvars.append(started) if ended is not None: qry += " ended = ?," qvars.append(ended) if status is not None: qry += " status = ?," qvars.append(status) # guid = guid is a little hack to avoid messing with , placement above qry += " guid = guid WHERE guid = ?" qvars.append(instanceId) try: self.dbh.execute(qry, qvars) self.conn.commit() except sqlite3.Error: self.sf.fatal("Unable to set information for the scan instance.") # Return info about a scan instance (name, target, created, started, # ended, status) - don't need this yet - untested def scanInstanceGet(self, instanceId): qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \ ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \ FROM tbl_scan_instance WHERE guid = ?" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchone() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving scan instance:" + e.args[0]) # Obtain a summary of the results per event type def scanResultSummary(self, instanceId, by="type"): if by == "type": qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr" if by == "module": qry = "SELECT r.module, '', MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? GROUP BY r.module ORDER BY r.module DESC" if by == "entity": qry = "SELECT r.data, e.event_descr, MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? \ AND e.event_type in ('ENTITY') \ GROUP BY r.data, e.event_descr ORDER BY total DESC limit 50" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result summary: " + e.args[0]) # Obtain the data for a scan and event type def scanResultEvent(self, instanceId, eventType='ALL', filterFp=False): qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type" qvars = [instanceId] if eventType != "ALL": qry += " AND c.type = ?" qvars.append(eventType) if filterFp: qry += " AND c.false_positive <> 1" qry += " ORDER BY c.data" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result events: " + e.args[0]) # Obtain a unique list of elements def scanResultEventUnique(self, instanceId, eventType='ALL', filterFp=False): qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ?" qvars = [instanceId] if eventType != "ALL": qry += " AND type = ?" qvars.append(eventType) if filterFp: qry += " AND false_positive <> 1" qry += " GROUP BY type, data ORDER BY COUNT(*)" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching unique result events: " + e.args[0]) # Get scan logs def scanLogs(self, instanceId, limit=None): qry = "SELECT generated AS generated, component, \ type, message FROM tbl_scan_log WHERE scan_instance_id = ? \ ORDER BY generated DESC" qvars = [instanceId] if limit is not None: qry += " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan logs: " + e.args[0]) # Get scan errors def scanErrors(self, instanceId, limit=None): qry = "SELECT generated AS generated, component, \ message FROM tbl_scan_log WHERE scan_instance_id = ? \ AND type = 'ERROR' ORDER BY generated DESC" qvars = [instanceId] if limit is not None: qry += " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan errors: " + e.args[0]) # Delete a scan instance def scanInstanceDelete(self, instanceId): qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?" qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?" qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?" qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?" qvars = [instanceId] try: self.dbh.execute(qry1, qvars) self.dbh.execute(qry2, qvars) self.dbh.execute(qry3, qvars) self.dbh.execute(qry4, qvars) self.conn.commit() except sqlite3.Error as e: self.sf.error("SQL error encountered when deleting scan: " + e.args[0]) # Set the false positive flag for a result def scanResultsUpdateFP(self, instanceId, resultHashes, fpFlag): for resultHash in resultHashes: qry = "UPDATE tbl_scan_results SET false_positive = ? WHERE \ scan_instance_id = ? AND hash = ?" qvars = [fpFlag, instanceId, resultHash] try: self.dbh.execute(qry, qvars) except sqlite3.Error as e: self.sf.error("SQL error encountered when updating F/P: " + e.args[0], False) return False self.conn.commit() return True # Store the default configuration def configSet(self, optMap=dict()): qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [parts[0], parts[1], optMap[opt]] else: # Global option qvals = ["GLOBAL", opt, optMap[opt]] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive the config from the database def configGet(self): qry = "SELECT scope, opt, val FROM tbl_config" try: retval = dict() self.dbh.execute(qry) for [scope, opt, val] in self.dbh.fetchall(): if scope == "GLOBAL": retval[opt] = val else: retval[scope + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Reset the config to default (clear it from the DB and let the hard-coded # settings in the code take effect.) def configClear(self): qry = "DELETE from tbl_config" try: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: self.sf.error("Unable to clear configuration from the database: " + e.args[0]) # Store a configuration value for a scan def scanConfigSet(self, id, optMap=dict()): qry = "REPLACE INTO tbl_scan_config \ (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [id, parts[0], parts[1], optMap[opt]] else: # Global option qvals = [id, "GLOBAL", opt, optMap[opt]] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive configuration data for a scan component def scanConfigGet(self, instanceId): qry = "SELECT component, opt, val FROM tbl_scan_config \ WHERE scan_instance_id = ? ORDER BY component, opt" qvars = [instanceId] try: retval = dict() self.dbh.execute(qry, qvars) for [component, opt, val] in self.dbh.fetchall(): if component == "GLOBAL": retval[opt] = val else: retval[component + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Store an event # eventData is a SpiderFootEvent object with the following variables: # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc. # - generated: time the event occurred # - confidence: how sure are we of this data's validity, 0-100 # - visibility: how 'visible' was this data, 0-100 # - risk: how much risk does this data represent, 0-100 # - module: module that generated the event # - data: the actual data, i.e. a URL, port number, webpage content, etc. # - sourceEventHash: hash of the event that triggered this event # And getHash() will return the event hash. def scanEventStore(self, instanceId, sfEvent, truncateSize=0): storeData = '' if type(sfEvent.data) is not unicode: # If sfEvent.data is a dict or list, convert it to a string first, as # those types do not have a unicode converter. if type(sfEvent.data) is str: storeData = unicode(sfEvent.data, 'utf-8', errors='replace') else: try: storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace') except BaseException as e: self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data))) else: storeData = sfEvent.data if truncateSize > 0: storeData = storeData[0:truncateSize] if sfEvent.sourceEventHash in ["", None]: self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!") qry = "INSERT INTO tbl_scan_results \ (scan_instance_id, hash, type, generated, confidence, \ visibility, risk, module, data, source_event_hash) \ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" qvals = [instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated, sfEvent.confidence, sfEvent.visibility, sfEvent.risk, sfEvent.module, storeData, sfEvent.sourceEventHash] #print "STORING: " + str(qvals) try: self.dbh.execute(qry, qvals) self.conn.commit() return None except sqlite3.Error as e: self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " + e.args[0]) # List of all previously run scans def scanInstanceList(self): # SQLite doesn't support OUTER JOINs, so we need a work-around that # does a UNION of scans with results and scans without results to # get a complete listing. qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \ FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \ AND r.type <> 'ROOT' GROUP BY i.guid \ UNION ALL \ SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \ FROM tbl_scan_instance i WHERE i.guid NOT IN ( \ SELECT distinct scan_instance_id FROM tbl_scan_results WHERE type <> 'ROOT') \ ORDER BY started DESC" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan list: " + e.args[0]) # History of data from the scan def scanResultHistory(self, instanceId): qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \ type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ? GROUP BY hourmin, type" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan history: " + e.args[0]) # Get the source IDs, types and data for a set of IDs def scanElementSourcesDirect(self, instanceId, elementIdList): # the output of this needs to be aligned with scanResultEvent, # as other functions call both expecting the same output. qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.hash in (" qvars = [instanceId] for hashId in elementIdList: qry = qry + "'" + hashId + "'," qry += "'')" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0]) # Get the child IDs, types and data for a set of IDs def scanElementChildrenDirect(self, instanceId, elementIdList): # the output of this needs to be aligned with scanResultEvent, # as other functions call both expecting the same output. qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND s.hash in (" qvars = [instanceId] for hashId in elementIdList: qry = qry + "'" + hashId + "'," qry += "'')" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when getting child element IDs: " + e.args[0]) # Get the full set of upstream IDs which are parents to the # supplied set of IDs. # Data has to be in the format of output from scanElementSourcesDirect # and produce output in the same format. def scanElementSourcesAll(self, instanceId, childData): # Get the first round of source IDs for the leafs keepGoing = True nextIds = list() datamap = dict() pc = dict() for row in childData: # these must be unique values! parentId = row[9] childId = row[8] datamap[childId] = row if parentId in pc: if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [childId] # parents of the leaf set if parentId not in nextIds: nextIds.append(parentId) while keepGoing: parentSet = self.scanElementSourcesDirect(instanceId, nextIds) nextIds = list() keepGoing = False for row in parentSet: parentId = row[9] childId = row[8] datamap[childId] = row #print childId + " = " + str(row) if parentId in pc: if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [childId] if parentId not in nextIds: nextIds.append(parentId) # Prevent us from looping at root if parentId != "ROOT": keepGoing = True datamap[parentId] = row return [datamap, pc] # Get the full set of downstream IDs which are children of the # supplied set of IDs # NOTE FOR NOW THE BEHAVIOR IS NOT THE SAME AS THE scanElementParent* # FUNCTIONS - THIS ONLY RETURNS IDS!! def scanElementChildrenAll(self, instanceId, parentIds): datamap = list() keepGoing = True nextIds = list() nextSet = self.scanElementChildrenDirect(instanceId, parentIds) for row in nextSet: datamap.append(row[8]) for row in nextSet: if row[8] not in nextIds: nextIds.append(row[8]) while keepGoing: nextSet = self.scanElementChildrenDirect(instanceId, nextIds) if nextSet == None or len(nextSet) == 0: keepGoing = False break for row in nextSet: datamap.append(row[8]) nextIds = list() nextIds.append(row[8]) return datamap
p.add_argument("-q", action='store_true', help="Disable logging.") args = p.parse_args() sfConfig['__logstdout'] = True if args.debug: sfConfig['_debug'] = True else: sfConfig['_debug'] = False if args.q: sfConfig['__logging'] = False sfModules = dict() sft = SpiderFoot(sfConfig) # Go through each module in the modules directory with a .py extension for filename in os.listdir(sft.myPath() + '/modules/'): if filename.startswith("sfp_") and filename.endswith(".py"): # Skip the module template and debugging modules if filename == "sfp_template.py" or filename == 'sfp_stor_print.py': continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 5)[0] sfModules[modName]['cats'] = sfModules[modName]['object'].__doc__.split(":", 5)[1].split(",") sfModules[modName]['group'] = sfModules[modName]['object'].__doc__.split(":", 5)[2]
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_torexits() module.setup(sf, dict())
'_socks2addr': 'SOCKS Server IP Address.', '_socks3port': 'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.', '_socks4user': '******', '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.", '_socks6dns': "Resolve DNS through the SOCKS proxy? Has no affect when TOR is used: Will always be True.", '_torctlport': "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.", '_modulesenabled': "Modules enabled for the scan." # This is a hack to get a description for an option not actually available. } if __name__ == '__main__': if len(sys.argv) > 1: (addr, port) = sys.argv[1].split(":") sfConfig['__webaddr'] = addr sfConfig['__webport'] = int(port) sf = SpiderFoot(sfConfig) sfModules = dict() # Go through each module in the modules directory with a .py extension for filename in os.listdir(sf.myPath() + '/modules/'): if filename.startswith("sfp_") and filename.endswith(".py"): # Skip the module template and debugging modules if filename == "sfp_template.py" or filename == 'sfp_stor_print.py': continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 2)[0]
def test_init(self): """ Test __init__(self, options, handle=None): """ sf = SpiderFoot(self.default_options) self.assertEqual('TBD', 'TBD')
def test_setup(self): sf = SpiderFoot(self.default_options) module = sfp_neutrinoapi() module.setup(sf, dict())
'_socks2addr': 'SOCKS Server IP Address.', '_socks3port': 'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.', '_socks4user': '******', '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.", '_socks6dns': "Resolve DNS through the SOCKS proxy? Has no affect when TOR is used: Will always be True.", '_torctlport': "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.", '_modulesenabled': "Modules enabled for the scan." # This is a hack to get a description for an option not actually available. } if __name__ == '__main__': if len(sys.argv) > 1: (addr, port) = sys.argv[1].split(":") sfConfig['__webaddr'] = addr sfConfig['__webport'] = int(port) sf = SpiderFoot(sfConfig) sfModules = dict() # Go through each module in the modules directory with a .py extension for filename in os.listdir(sf.myPath() + '/modules/'): if filename.startswith("sfp_") and filename.endswith(".py"): # Skip the module template and debugging modules if filename == "sfp_template.py" or filename == 'sfp_stor_print.py': continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() sfModules[modName]['name'] = sfModules[modName]['object'].__doc__.split(":", 5)[0]
def test_init_no_options(self): """ Test __init__(self, options, handle=None): """ sf = SpiderFoot(dict()) self.assertEqual('TBD', 'TBD')
def start_web_server(sfWebUiConfig, sfConfig): """Start the web server so you can start looking at results Args: sfWebUiConfig (dict): web server options sfConfig (dict): SpiderFoot config options """ web_host = sfWebUiConfig.get('host', '127.0.0.1') web_port = sfWebUiConfig.get('port', 5001) web_root = sfWebUiConfig.get('root', '/') cherrypy.config.update({ 'log.screen': False, 'server.socket_host': web_host, 'server.socket_port': int(web_port) }) log.info(f"Starting web server at {web_host}:{web_port} ...") # Disable auto-reloading of content cherrypy.engine.autoreload.unsubscribe() sf = SpiderFoot(sfConfig) # Enable access to static files via the web directory conf = { '/query': { 'tools.encode.text_only': False, 'tools.encode.add_charset': True, }, '/static': { 'tools.staticdir.on': True, 'tools.staticdir.dir': 'static', 'tools.staticdir.root': sf.myPath() } } passwd_file = sf.dataPath() + '/passwd' if os.path.isfile(passwd_file): if not os.access(passwd_file, os.R_OK): log.error("Could not read passwd file. Permission denied.") sys.exit(-1) secrets = dict() pw = open(passwd_file, 'r') for line in pw.readlines(): if ':' not in line: log.error( "Incorrect format of passwd file, must be username:password on each line." ) sys.exit(-1) u = line.strip().split(":")[0] p = ':'.join(line.strip().split(":")[1:]) if not u or not p: log.error( "Incorrect format of passwd file, must be username:password on each line." ) sys.exit(-1) secrets[u] = p if secrets: log.info("Enabling authentication based on supplied passwd file.") conf['/'] = { 'tools.auth_digest.on': True, 'tools.auth_digest.realm': web_host, 'tools.auth_digest.get_ha1': auth_digest.get_ha1_dict_plain(secrets), 'tools.auth_digest.key': random.SystemRandom().randint(0, 99999999) } else: warn_msg = "\n********************************************************************\n" warn_msg += "Warning: passwd file contains no passwords. Authentication disabled.\n" warn_msg += "********************************************************************\n" log.warning(warn_msg) else: warn_msg = "\n********************************************************************\n" warn_msg += "Please consider adding authentication to protect this instance!\n" warn_msg += "Refer to https://www.spiderfoot.net/documentation/#security.\n" warn_msg += "********************************************************************\n" log.warning(warn_msg) if web_host == "0.0.0.0": # nosec url = f"http://<IP of this host>:{web_port}{web_root}" else: url = f"http://{web_host}:{web_port}{web_root}" key_path = sf.dataPath() + '/spiderfoot.key' crt_path = sf.dataPath() + '/spiderfoot.crt' if os.path.isfile(key_path) and os.path.isfile(crt_path): if not os.access(crt_path, os.R_OK): log.critical(f"Could not read {crt_path} file. Permission denied.") sys.exit(-1) if not os.access(key_path, os.R_OK): log.critical(f"Could not read {key_path} file. Permission denied.") sys.exit(-1) log.info("Enabling SSL based on supplied key and certificate file.") cherrypy.server.ssl_module = 'builtin' cherrypy.server.ssl_certificate = crt_path cherrypy.server.ssl_private_key = key_path url = url.replace("http://", "https://") print("") print("*************************************************************") print(" Use SpiderFoot by starting your web browser of choice and ") print(f" browse to {url}") print("*************************************************************") print("") cherrypy.quickstart(SpiderFootWebUi(sfWebUiConfig, sfConfig), script_name=web_root, config=conf)
def main(): if len(sys.argv) <= 1: print( "SpiderFoot requires -l <ip>:<port> to start the web server. Try --help for guidance." ) sys.exit(-1) # web server config sfWebUiConfig = {'host': '127.0.0.1', 'port': 5001, 'root': '/'} # 'Global' configuration options # These can be overriden on a per-module basis, and some will # be overridden from saved configuration settings stored in the DB. sfConfig = { '_debug': False, # Debug '__logging': True, # Logging in general '__outputfilter': None, # Event types to filter from modules' output '_useragent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0', # User-Agent to use for HTTP requests '_dnsserver': '', # Override the default resolver '_fetchtimeout': 5, # number of seconds before giving up on a fetch '_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat', '_internettlds_cache': 72, '_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www", '__version__': '3.3-DEV', '__database': 'spiderfoot.db', '__modules__': None, # List of modules. Will be set after start-up. '_socks1type': '', '_socks2addr': '', '_socks3port': '', '_socks4user': '', '_socks5pwd': '', '_torctlport': 9051 } sfOptdescs = { '_debug': "Enable debugging?", '_useragent': "User-Agent string to use for HTTP requests. Prefix with an '@' to randomly select the User Agent from a file containing user agent strings for each request, e.g. @C:\\useragents.txt or @/home/bob/useragents.txt. Or supply a URL to load the list from there.", '_dnsserver': "Override the default resolver with another DNS server. For example, 8.8.8.8 is Google's open DNS server.", '_fetchtimeout': "Number of seconds before giving up on a HTTP request.", '_internettlds': "List of Internet TLDs.", '_internettlds_cache': "Hours to cache the Internet TLD list. This can safely be quite a long time given that the list doesn't change too often.", '_genericusers': "List of usernames that if found as usernames or as part of e-mail addresses, should be treated differently to non-generics.", '_socks1type': "SOCKS Server Type. Can be '4', '5', 'HTTP' or 'TOR'", '_socks2addr': 'SOCKS Server IP Address.', '_socks3port': 'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.', '_socks4user': '******', '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.", '_torctlport': "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.", '_modulesenabled': "Modules enabled for the scan." # This is a hack to get a description for an option not actually available. } # Legacy way to run the server args = None p = argparse.ArgumentParser( description='SpiderFoot 3.3-DEV: Open Source Intelligence Automation.') p.add_argument("-d", "--debug", action='store_true', help="Enable debug output.") p.add_argument("-l", metavar="IP:port", help="IP and port to listen on.") p.add_argument("-m", metavar="mod1,mod2,...", type=str, help="Modules to enable.") p.add_argument("-M", "--modules", action='store_true', help="List available modules.") p.add_argument("-s", metavar="TARGET", help="Target for the scan.") p.add_argument( "-t", metavar="type1,type2,...", type=str, help="Event types to collect (modules selected automatically).") p.add_argument("-T", "--types", action='store_true', help="List available event types.") p.add_argument( "-o", metavar="tab|csv|json", type=str, help="Output format. Tab is default. If using json, -q is enforced.") p.add_argument("-H", action='store_true', help="Don't print field headers, just data.") p.add_argument("-n", action='store_true', help="Strip newlines from data.") p.add_argument("-r", action='store_true', help="Include the source data field in tab/csv output.") p.add_argument( "-S", metavar="LENGTH", type=int, help="Maximum data length to display. By default, all data is shown.") p.add_argument("-D", metavar='DELIMITER', type=str, help="Delimiter to use for CSV output. Default is ,.") p.add_argument( "-f", action='store_true', help="Filter out other event types that weren't requested with -t.") p.add_argument("-F", metavar="type1,type2,...", type=str, help="Show only a set of event types, comma-separated.") p.add_argument( "-x", action='store_true', help= "STRICT MODE. Will only enable modules that can directly consume your target, and if -t was specified only those events will be consumed by modules. This overrides -t and -m options." ) p.add_argument("-q", action='store_true', help="Disable logging. This will also hide errors!") args = p.parse_args() if args.debug: sfConfig['_debug'] = True log.setLevel(logging.DEBUG) else: log.setLevel(logging.INFO) sfConfig['_debug'] = False if args.q or args.o == "json": log.setLevel(logging.NOTSET) sfConfig['__logging'] = False sfModules = dict() sft = SpiderFoot(sfConfig) # Load each module in the modules directory with a .py extension mod_dir = sft.myPath() + '/modules/' if not os.path.isdir(mod_dir): log.critical(f"Modules directory does not exist: {mod_dir}") sys.exit(-1) for filename in os.listdir(mod_dir): if filename.startswith("sfp_") and filename.endswith(".py"): # Skip the module template and debugging modules if filename in ('sfp_template.py', 'sfp_stor_print.py'): continue modName = filename.split('.')[0] # Load and instantiate the module sfModules[modName] = dict() mod = __import__('modules.' + modName, globals(), locals(), [modName]) sfModules[modName]['object'] = getattr(mod, modName)() try: sfModules[modName]['name'] = sfModules[modName]['object'].meta[ 'name'] sfModules[modName]['cats'] = sfModules[modName][ 'object'].meta.get('categories', list()) sfModules[modName]['group'] = sfModules[modName][ 'object'].meta.get('useCases', list()) if len(sfModules[modName]['cats']) > 1: raise ValueError( f"Module {modName} has multiple categories defined but only one is supported." ) sfModules[modName]['labels'] = sfModules[modName][ 'object'].meta.get('flags', list()) sfModules[modName]['descr'] = sfModules[modName][ 'object'].meta['summary'] sfModules[modName]['provides'] = sfModules[modName][ 'object'].producedEvents() sfModules[modName]['consumes'] = sfModules[modName][ 'object'].watchedEvents() sfModules[modName]['meta'] = sfModules[modName]['object'].meta if hasattr(sfModules[modName]['object'], 'opts'): sfModules[modName]['opts'] = sfModules[modName][ 'object'].opts if hasattr(sfModules[modName]['object'], 'optdescs'): sfModules[modName]['optdescs'] = sfModules[modName][ 'object'].optdescs except BaseException as e: log.critical(f"Failed to load {modName}: {e}") sys.exit(-1) if not sfModules: log.critical(f"No modules found in modules directory: {mod_dir}") sys.exit(-1) # Add module info to sfConfig so it can be used by the UI sfConfig['__modules__'] = sfModules # Add descriptions of the global config options sfConfig['__globaloptdescs__'] = sfOptdescs if args.l: try: (host, port) = args.l.split(":") except BaseException: log.critical("Invalid ip:port format.") sys.exit(-1) sfWebUiConfig['host'] = host sfWebUiConfig['port'] = port start_web_server(sfWebUiConfig, sfConfig) else: start_scan(sfConfig, sfModules, args)
def start_scan(sfConfig, sfModules, args): global dbh global scanId dbh = SpiderFootDb(sfConfig, init=True) sf = SpiderFoot(sfConfig) if args.modules: log.info("Modules available:") for m in sorted(sfModules.keys()): if "__" in m: continue print(('{0:25} {1}'.format(m, sfModules[m]['descr']))) sys.exit(0) if args.types: log.info("Types available:") typedata = dbh.eventTypes() types = dict() for r in typedata: types[r[1]] = r[0] for t in sorted(types.keys()): print(('{0:45} {1}'.format(t, types[t]))) sys.exit(0) if not args.s: log.error( "You must specify a target when running in scan mode. Try --help for guidance." ) sys.exit(-1) if args.x and not args.t: log.error("-x can only be used with -t. Use --help for guidance.") sys.exit(-1) if args.x and args.m: log.error( "-x can only be used with -t and not with -m. Use --help for guidance." ) sys.exit(-1) if args.r and (args.o and args.o not in ["tab", "csv"]): log.error("-r can only be used when your output format is tab or csv.") sys.exit(-1) if args.H and (args.o and args.o not in ["tab", "csv"]): log.error("-H can only be used when your output format is tab or csv.") sys.exit(-1) if args.D and args.o != "csv": log.error("-D can only be used when using the csv output format.") sys.exit(-1) target = args.s # Usernames and names - quoted on the commandline - won't have quotes, # so add them. if " " in target: target = f"\"{target}\"" if "." not in target and not target.startswith("+") and '"' not in target: target = f"\"{target}\"" targetType = sf.targetType(target) if not targetType: log.error(f"Could not determine target type. Invalid target: {target}") sys.exit(-1) target = target.strip('"') modlist = list() if not args.t and not args.m: log.warning( "You didn't specify any modules or types, so all will be enabled.") for m in list(sfModules.keys()): if "__" in m: continue modlist.append(m) signal.signal(signal.SIGINT, handle_abort) # If the user is scanning by type.. # 1. Find modules producing that type if args.t: types = args.t modlist = sf.modulesProducing(types) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # Easier if scanning by module if args.m: modlist = list(filter(None, args.m.split(","))) # Add sfp__stor_stdout to the module list typedata = dbh.eventTypes() types = dict() for r in typedata: types[r[1]] = r[0] sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts'] sfp__stor_stdout_opts['_eventtypes'] = types if args.f: if args.f and not args.t: log.error("You can only use -f with -t. Use --help for guidance.") sys.exit(-1) sfp__stor_stdout_opts['_showonlyrequested'] = True if args.F: sfp__stor_stdout_opts['_requested'] = args.F.split(",") sfp__stor_stdout_opts['_showonlyrequested'] = True if args.o: sfp__stor_stdout_opts['_format'] = args.o if args.t: sfp__stor_stdout_opts['_requested'] = args.t.split(",") if args.n: sfp__stor_stdout_opts['_stripnewline'] = True if args.r: sfp__stor_stdout_opts['_showsource'] = True if args.S: sfp__stor_stdout_opts['_maxlength'] = args.S if args.D: sfp__stor_stdout_opts['_csvdelim'] = args.D if args.x: tmodlist = list() modlist = list() xmods = sf.modulesConsuming([targetType]) for mod in xmods: if mod not in modlist: tmodlist.append(mod) # Remove any modules not producing the type requested rtypes = args.t.split(",") for mod in tmodlist: for r in rtypes: if not sfModules[mod]['provides']: continue if r in sfModules[mod].get('provides', []) and mod not in modlist: modlist.append(mod) if len(modlist) == 0: log.error("Based on your criteria, no modules were enabled.") sys.exit(-1) modlist += ["sfp__stor_db", "sfp__stor_stdout"] # Run the scan if sfConfig['__logging']: log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}") cfg = sf.configUnserialize(dbh.configGet(), sfConfig) # Debug mode is a variable that gets stored to the DB, so re-apply it if args.debug: cfg['_debug'] = True else: cfg['_debug'] = False # If strict mode is enabled, filter the output from modules. if args.x and args.t: cfg['__outputfilter'] = args.t.split(",") # Prepare scan output headers if args.o == "json": print("[", end='') elif not args.H: delim = "\t" if args.o == "tab": delim = "\t" if args.o == "csv": if args.D: delim = args.D else: delim = "," if args.r: if delim == "\t": headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format( "Source", delim, "Type", delim, "Source Data", delim, "Data") else: headers = delim.join(["Source", "Type", "Source Data", "Data"]) else: if delim == "\t": headers = '{0:30}{1}{2:45}{3}{4}'.format( "Source", delim, "Type", delim, "Data") else: headers = delim.join(["Source", "Type", "Data"]) print(headers) # Start running a new scan scanName = target scanId = sf.genScanInstanceId() try: p = mp.Process(target=SpiderFootScanner, args=(scanName, scanId, target, targetType, modlist, cfg)) p.daemon = True p.start() except BaseException as e: log.error(f"Scan [{scanId}] failed: {e}") sys.exit(-1) # Poll for scan status until completion while True: time.sleep(1) info = dbh.scanInstanceGet(scanId) if not info: continue if info[5] in [ "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED" ]: if sfConfig['__logging']: log.info(f"Scan completed with status {info[5]}") if args.o == "json": print("]") sys.exit(0) return
def startscan(self, scanname, scantarget, modulelist, typelist, usecase): global globalScanStatus # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modopts = dict() # Not used yet as module options are set globally modlist = list() sf = SpiderFoot(cfg) dbh = SpiderFootDb(cfg) types = dbh.eventTypes() targetType = None [scanname, scantarget] = self.cleanUserInput([scanname, scantarget]) if scanname == "" or scantarget == "": return self.error("Form incomplete.") if typelist == "" and modulelist == "" and usecase == "": return self.error("Form incomplete.") # User selected modules if modulelist != "": modlist = modulelist.replace('module_', '').split(',') # User selected types if len(modlist) == 0 and typelist != "": typesx = typelist.replace('type_', '').split(',') # 1. Find all modules that produce the requested types modlist = sf.modulesProducing(typesx) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # User selected a use case if len(modlist) == 0 and usecase != "": for mod in self.config['__modules__']: if usecase == 'all' or usecase in self.config['__modules__'][ mod]['cats']: modlist.append(mod) # Add our mandatory storage module.. if "sfp__stor_db" not in modlist: modlist.append("sfp__stor_db") modlist.sort() targetType = sf.targetType(scantarget) if targetType is None: return self.error("Invalid target type. Could not recognize it as " + \ "an IP address, IP subnet, domain name or host name.") # Start running a new scan scanId = sf.genScanInstanceGUID(scanname) t = SpiderFootScanner(scanname, scantarget.lower(), targetType, scanId, modlist, cfg, modopts) t.start() # Wait until the scan has initialized while globalScanStatus.getStatus(scanId) is None: print "[info] Waiting for the scan to initialize..." time.sleep(1) templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup) return templ.render(id=scanId, name=scanname, docroot=self.docroot, status=globalScanStatus.getStatus(scanId), pageid="SCANLIST")
def test_init(self): """ Test __init__(self, options, handle=None): """ sf = SpiderFoot(self.default_options) self.assertIsInstance(sf, SpiderFoot)
def test_init_no_options(self): """ Test __init__(self, options, handle=None): """ sf = SpiderFoot(dict()) self.assertIsInstance(sf, SpiderFoot)
class SpiderFootDb: sf = None dbh = None conn = None # Queries for creating the SpiderFoot database createQueries = [ "PRAGMA journal_mode=WAL", "CREATE TABLE tbl_event_types ( \ event VARCHAR NOT NULL PRIMARY KEY, \ event_descr VARCHAR NOT NULL, \ event_raw INT NOT NULL DEFAULT 0, \ event_type VARCHAR NOT NULL \ )", "CREATE TABLE tbl_config ( \ scope VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL, \ PRIMARY KEY (scope, opt) \ )", "CREATE TABLE tbl_scan_instance ( \ guid VARCHAR NOT NULL PRIMARY KEY, \ name VARCHAR NOT NULL, \ seed_target VARCHAR NOT NULL, \ created INT DEFAULT 0, \ started INT DEFAULT 0, \ ended INT DEFAULT 0, \ status VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_log ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ generated INT NOT NULL, \ component VARCHAR, \ type VARCHAR NOT NULL, \ message VARCHAR \ )", "CREATE TABLE tbl_scan_config ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ component VARCHAR NOT NULL, \ opt VARCHAR NOT NULL, \ val VARCHAR NOT NULL \ )", "CREATE TABLE tbl_scan_results ( \ scan_instance_id VARCHAR NOT NULL REFERENCES tbl_scan_instance(guid), \ hash VARCHAR NOT NULL, \ type VARCHAR NOT NULL REFERENCES tbl_event_types(event), \ generated INT NOT NULL, \ confidence INT NOT NULL DEFAULT 100, \ visibility INT NOT NULL DEFAULT 100, \ risk INT NOT NULL DEFAULT 0, \ module VARCHAR NOT NULL, \ data VARCHAR, \ false_positive INT NOT NULL DEFAULT 0, \ source_event_hash VARCHAR DEFAULT 'ROOT' \ )", "CREATE INDEX idx_scan_results_id ON tbl_scan_results (scan_instance_id)", "CREATE INDEX idx_scan_results_type ON tbl_scan_results (scan_instance_id, type)", "CREATE INDEX idx_scan_results_hash ON tbl_scan_results (scan_instance_id, hash)", "CREATE INDEX idx_scan_results_srchash ON tbl_scan_results (scan_instance_id, source_event_hash)", "CREATE INDEX idx_scan_logs ON tbl_scan_log (scan_instance_id)", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ROOT', 'Internal SpiderFoot Root event', 1, 'INTERNAL')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED', 'Account on External Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_OWNED_COMPROMISED', 'Hacked Account on External Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED', 'User Account on External Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ACCOUNT_EXTERNAL_USER_SHARED_COMPROMISED', 'Hacked User Account on External Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_INTERNET_NAME', 'Affiliate - Internet Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_IPADDR', 'Affiliate - IP Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_WEB_CONTENT', 'Affiliate - Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_CATEGORY', 'Affiliate Description - Category', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AFFILIATE_DESCRIPTION_ABSTRACT', 'Affiliate Description - Abstract', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('APPSTORE_ENTRY', 'App Store Entry', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('AMAZON_S3_BUCKET', 'Amazon S3 Bucket', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BASE64_DATA', 'Base64-encoded Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BITCOIN_ADDRESS', 'Bitcoin Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BITCOIN_BALANCE', 'Bitcoin Balance', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_OWNER', 'BGP AS Ownership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_MEMBER', 'BGP AS Membership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BGP_AS_PEER', 'BGP AS Peer', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_IPADDR', 'Blacklisted IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_AFFILIATE_IPADDR', 'Blacklisted Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_SUBNET', 'Blacklisted IP on Same Subnet', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('BLACKLISTED_NETBLOCK', 'Blacklisted IP on Owned Netblock', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('CO_HOSTED_SITE', 'Co-Hosted Site', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_URL', 'Darknet Mention URL', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DARKNET_MENTION_CONTENT', 'Darknet Mention Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_INTERNET_NAME', 'Defaced', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_IPADDR', 'Defaced IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_INTERNET_NAME', 'Defaced Affiliate', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_COHOST', 'Defaced Co-Hosted Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEFACED_AFFILIATE_IPADDR', 'Defaced Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_CATEGORY', 'Description - Category', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DESCRIPTION_ABSTRACT', 'Description - Abstract', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DEVICE_TYPE', 'Device Type', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_TEXT', 'DNS TXT Record', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DNS_SPF', 'DNS SPF Record', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME', 'Domain Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_NAME_PARENT', 'Domain Name (Parent)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_REGISTRAR', 'Domain Registrar', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('DOMAIN_WHOIS', 'Domain Whois', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR', 'Email Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('EMAILADDR_COMPROMISED', 'Hacked Email Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('ERROR_MESSAGE', 'Error Message', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('GEOINFO', 'Physical Location', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HTTP_CODE', 'HTTP Status Code', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('HUMAN_NAME', 'Human Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE', 'Interesting File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERESTING_FILE_HISTORIC', 'Historic Interesting File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('JUNK_FILE', 'Junk File', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('INTERNET_NAME', 'Internet Name', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IP_ADDRESS', 'IP Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('IPV6_ADDRESS', 'IPv6 Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_INTERNAL', 'Linked URL - Internal', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LINKED_URL_EXTERNAL', 'Linked URL - External', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_ASN', 'Malicious AS', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_IPADDR', 'Malicious IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_COHOST', 'Malicious Co-Hosted Site', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_EMAILADDR', 'Malicious E-mail Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_INTERNET_NAME', 'Malicious Internet Name', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_INTERNET_NAME', 'Malicious Affiliate', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_AFFILIATE_IPADDR', 'Malicious Affiliate IP Address', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_NETBLOCK', 'Malicious IP on Owned Netblock', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('MALICIOUS_SUBNET', 'Malicious IP on Same Subnet', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_OWNER', 'Netblock Ownership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_MEMBER', 'Netblock Membership', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('NETBLOCK_WHOIS', 'Netblock Whois', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('OPERATING_SYSTEM', 'Operating System', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_URL', 'Leak Site URL', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('LEAKSITE_CONTENT', 'Leak Site Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHONE_NUMBER', 'Phone Number', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_ADDRESS', 'Physical Address', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PHYSICAL_COORDINATES', 'Physical Coordinates', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PGP_KEY', 'PGP Public Key', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_DNS', 'Name Server (DNS ''NS'' Records)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_JAVASCRIPT', 'Externally Hosted Javascript', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_MAIL', 'Email Gateway (DNS ''MX'' Records)', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PROVIDER_HOSTING', 'Hosting Provider', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('PUBLIC_CODE_REPO', 'Public Code Repository', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_RIR_DATA', 'Raw Data from RIRs', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_DNS_RECORDS', 'Raw DNS Records', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('RAW_FILE_META_DATA', 'Raw File Meta Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SEARCH_ENGINE_WEB_CONTENT', 'Search Engine''s Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOCIAL_MEDIA', 'Social Media Presence', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SIMILARDOMAIN', 'Similar Domain', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SOFTWARE_USED', 'Software Used', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_RAW', 'SSL Certificate - Raw Data', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUED', 'SSL Certificate - Issued to', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_ISSUER', 'SSL Certificate - Issued by', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_MISMATCH', 'SSL Certificate Host Mismatch', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRED', 'SSL Certificate Expired', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('SSL_CERTIFICATE_EXPIRING', 'SSL Certificate Expiring', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_CONTENT', 'Web Content', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TARGET_WEB_COOKIE', 'Cookies', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN', 'Open TCP Port', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('TCP_PORT_OPEN_BANNER', 'Open TCP Port Banner', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN', 'Open UDP Port', 0, 'SUBENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('UDP_PORT_OPEN_INFO', 'Open UDP Port Information', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_EXTERNAL', 'URL (AdBlocked External)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_ADBLOCKED_INTERNAL', 'URL (AdBlocked Internal)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM', 'URL (Form)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH', 'URL (Uses Flash)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT', 'URL (Uses Javascript)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK', 'URL (Uses a Web Framework)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET', 'URL (Uses Java Applet)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC', 'URL (Purely Static)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD', 'URL (Accepts Passwords)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD', 'URL (Accepts Uploads)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FORM_HISTORIC', 'Historic URL (Form)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_FLASH_HISTORIC', 'Historic URL (Uses Flash)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVASCRIPT_HISTORIC', 'Historic URL (Uses Javascript)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_WEB_FRAMEWORK_HISTORIC', 'Historic URL (Uses a Web Framework)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_JAVA_APPLET_HISTORIC', 'Historic URL (Uses Java Applet)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_STATIC_HISTORIC', 'Historic URL (Purely Static)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_PASSWORD_HISTORIC', 'Historic URL (Accepts Passwords)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('URL_UPLOAD_HISTORIC', 'Historic URL (Accepts Uploads)', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('USERNAME', 'Username', 0, 'ENTITY')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('VULNERABILITY', 'Vulnerability in Public Domain', 0, 'DESCRIPTOR')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_BANNER', 'Web Server', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_HTTPHEADERS', 'HTTP Headers', 1, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_STRANGEHEADER', 'Non-Standard HTTP Header', 0, 'DATA')", "INSERT INTO tbl_event_types (event, event_descr, event_raw, event_type) VALUES ('WEBSERVER_TECHNOLOGY', 'Web Technology', 0, 'DESCRIPTOR')" ] def __init__(self, opts): self.sf = SpiderFoot(opts) # connect() will create the database file if it doesn't exist, but # at least we can use this opportunity to ensure we have permissions to # read and write to such a file. dbh = sqlite3.connect(self.sf.myPath() + "/" + opts['__database'], timeout=10) if dbh is None: self.sf.fatal("Could not connect to internal database, and couldn't create " + opts['__database']) dbh.text_factory = str self.conn = dbh self.dbh = dbh.cursor() # Now we actually check to ensure the database file has the schema set # up correctly. try: self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config') self.conn.create_function("REGEXP", 2, __dbregex__) except sqlite3.Error: # .. If not set up, we set it up. try: self.create() except BaseException as e: self.sf.error("Tried to set up the SpiderFoot database schema, but failed: " + e.args[0]) return # # Back-end database operations # # Create the back-end schema def create(self): try: for qry in self.createQueries: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: raise BaseException("SQL error encountered when setting up database: " + e.args[0]) # Close the database handle def close(self): self.dbh.close() # Search results # criteria is search criteria such as: # - scan_id (search within a scan, if omitted search all) # - type (search a specific type, if omitted search all) # - value (search values for a specific string, if omitted search all) # - regex (search values for a regular expression) # ** at least two criteria must be set ** def search(self, criteria, filterFp=False): if criteria.values().count(None) == 3: return False qvars = list() qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, c.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.source_event_hash = s.hash " if filterFp: qry += " AND c.false_positive <> 1 " if criteria.get('scan_id') is not None: qry += "AND c.scan_instance_id = ? " qvars.append(criteria['scan_id']) if criteria.get('type') is not None: qry += " AND c.type = ? " qvars.append(criteria['type']) if criteria.get('value') is not None: qry += " AND (c.data LIKE ? OR s.data LIKE ?) " qvars.append(criteria['value']) qvars.append(criteria['value']) if criteria.get('regex') is not None: qry += " AND (c.data REGEXP ? OR s.data REGEXP ?) " qvars.append(criteria['regex']) qvars.append(criteria['regex']) qry += " ORDER BY c.data" try: #print qry #print str(qvars) self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching search results: " + e.args[0]) # Get event types def eventTypes(self): qry = "SELECT event_descr, event, event_raw, event_type FROM tbl_event_types" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving event types:" + e.args[0]) # Log an event to the database def scanLogEvent(self, instanceId, classification, message, component=None): if component is None: component = "SpiderFoot" qry = "INSERT INTO tbl_scan_log \ (scan_instance_id, generated, component, type, message) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, time.time() * 1000, component, classification, message )) self.conn.commit() except sqlite3.Error as e: if "locked" in e.args[0]: # TODO: Do something smarter here to handle locked databases self.sf.fatal("Unable to log event in DB due to lock: " + e.args[0]) else: self.sf.fatal("Unable to log event in DB: " + e.args[0]) return True # Store a scan instance def scanInstanceCreate(self, instanceId, scanName, scanTarget): qry = "INSERT INTO tbl_scan_instance \ (guid, name, seed_target, created, status) \ VALUES (?, ?, ?, ?, ?)" try: self.dbh.execute(qry, ( instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED' )) self.conn.commit() except sqlite3.Error as e: self.sf.fatal("Unable to create instance in DB: " + e.args[0]) return True # Update the start time, end time or status (or all 3) of a scan instance def scanInstanceSet(self, instanceId, started=None, ended=None, status=None): qvars = list() qry = "UPDATE tbl_scan_instance SET " if started is not None: qry += " started = ?," qvars.append(started) if ended is not None: qry += " ended = ?," qvars.append(ended) if status is not None: qry += " status = ?," qvars.append(status) # guid = guid is a little hack to avoid messing with , placement above qry += " guid = guid WHERE guid = ?" qvars.append(instanceId) try: self.dbh.execute(qry, qvars) self.conn.commit() except sqlite3.Error: self.sf.fatal("Unable to set information for the scan instance.") # Return info about a scan instance (name, target, created, started, # ended, status) - don't need this yet - untested def scanInstanceGet(self, instanceId): qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \ ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \ FROM tbl_scan_instance WHERE guid = ?" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchone() except sqlite3.Error as e: self.sf.error("SQL error encountered when retreiving scan instance:" + e.args[0]) # Obtain a summary of the results per event type def scanResultSummary(self, instanceId, by="type"): if by == "type": qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr" if by == "module": qry = "SELECT r.module, '', MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? GROUP BY r.module ORDER BY r.module DESC" if by == "entity": qry = "SELECT r.data, e.event_descr, MAX(ROUND(generated)) AS last_in, \ count(*) AS total, count(DISTINCT r.data) as utotal FROM \ tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \ AND r.scan_instance_id = ? \ AND e.event_type in ('ENTITY') \ GROUP BY r.data, e.event_descr ORDER BY total DESC limit 50" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result summary: " + e.args[0]) # Obtain the data for a scan and event type def scanResultEvent(self, instanceId, eventType='ALL', filterFp=False): qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type" qvars = [instanceId] if eventType != "ALL": qry += " AND c.type = ?" qvars.append(eventType) if filterFp: qry += " AND c.false_positive <> 1" qry += " ORDER BY c.data" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching result events: " + e.args[0]) # Obtain a unique list of elements def scanResultEventUnique(self, instanceId, eventType='ALL', filterFp=False): qry = "SELECT DISTINCT data, type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ?" qvars = [instanceId] if eventType != "ALL": qry += " AND type = ?" qvars.append(eventType) if filterFp: qry += " AND false_positive <> 1" qry += " GROUP BY type, data ORDER BY COUNT(*)" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching unique result events: " + e.args[0]) # Get scan logs def scanLogs(self, instanceId, limit=None, fromRowId=None, reverse=False): qry = "SELECT generated AS generated, component, \ type, message, rowid FROM tbl_scan_log WHERE scan_instance_id = ?" if fromRowId: qry += " and rowid > ?" qry += " ORDER BY generated " if reverse: qry += "ASC" else: qry += "DESC" qvars = [instanceId] if fromRowId: qvars.append(fromRowId) if limit is not None: qry += " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan logs: " + e.args[0]) # Get scan errors def scanErrors(self, instanceId, limit=None): qry = "SELECT generated AS generated, component, \ message FROM tbl_scan_log WHERE scan_instance_id = ? \ AND type = 'ERROR' ORDER BY generated DESC" qvars = [instanceId] if limit is not None: qry += " LIMIT ?" qvars.append(limit) try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan errors: " + e.args[0]) # Delete a scan instance def scanInstanceDelete(self, instanceId): qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?" qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?" qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?" qry4 = "DELETE FROM tbl_scan_log WHERE scan_instance_id = ?" qvars = [instanceId] try: self.dbh.execute(qry1, qvars) self.dbh.execute(qry2, qvars) self.dbh.execute(qry3, qvars) self.dbh.execute(qry4, qvars) self.conn.commit() except sqlite3.Error as e: self.sf.error("SQL error encountered when deleting scan: " + e.args[0]) # Set the false positive flag for a result def scanResultsUpdateFP(self, instanceId, resultHashes, fpFlag): for resultHash in resultHashes: qry = "UPDATE tbl_scan_results SET false_positive = ? WHERE \ scan_instance_id = ? AND hash = ?" qvars = [fpFlag, instanceId, resultHash] try: self.dbh.execute(qry, qvars) except sqlite3.Error as e: self.sf.error("SQL error encountered when updating F/P: " + e.args[0], False) return False self.conn.commit() return True # Store the default configuration def configSet(self, optMap=dict()): qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [parts[0], parts[1], optMap[opt]] else: # Global option qvals = ["GLOBAL", opt, optMap[opt]] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive the config from the database def configGet(self): qry = "SELECT scope, opt, val FROM tbl_config" try: retval = dict() self.dbh.execute(qry) for [scope, opt, val] in self.dbh.fetchall(): if scope == "GLOBAL": retval[opt] = val else: retval[scope + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Reset the config to default (clear it from the DB and let the hard-coded # settings in the code take effect.) def configClear(self): qry = "DELETE from tbl_config" try: self.dbh.execute(qry) self.conn.commit() except sqlite3.Error as e: self.sf.error("Unable to clear configuration from the database: " + e.args[0]) # Store a configuration value for a scan def scanConfigSet(self, id, optMap=dict()): qry = "REPLACE INTO tbl_scan_config \ (scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)" for opt in optMap.keys(): # Module option if ":" in opt: parts = opt.split(':') qvals = [id, parts[0], parts[1], optMap[opt]] else: # Global option qvals = [id, "GLOBAL", opt, optMap[opt]] try: self.dbh.execute(qry, qvals) except sqlite3.Error as e: self.sf.error("SQL error encountered when storing config, aborting: " + e.args[0]) self.conn.commit() # Retreive configuration data for a scan component def scanConfigGet(self, instanceId): qry = "SELECT component, opt, val FROM tbl_scan_config \ WHERE scan_instance_id = ? ORDER BY component, opt" qvars = [instanceId] try: retval = dict() self.dbh.execute(qry, qvars) for [component, opt, val] in self.dbh.fetchall(): if component == "GLOBAL": retval[opt] = val else: retval[component + ":" + opt] = val return retval except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching configuration: " + e.args[0]) # Store an event # eventData is a SpiderFootEvent object with the following variables: # - eventType: the event, e.g. URL_FORM, RAW_DATA, etc. # - generated: time the event occurred # - confidence: how sure are we of this data's validity, 0-100 # - visibility: how 'visible' was this data, 0-100 # - risk: how much risk does this data represent, 0-100 # - module: module that generated the event # - data: the actual data, i.e. a URL, port number, webpage content, etc. # - sourceEventHash: hash of the event that triggered this event # And getHash() will return the event hash. def scanEventStore(self, instanceId, sfEvent, truncateSize=0): storeData = '' if type(sfEvent.data) is not unicode: # If sfEvent.data is a dict or list, convert it to a string first, as # those types do not have a unicode converter. if type(sfEvent.data) is str: storeData = unicode(sfEvent.data, 'utf-8', errors='replace') else: try: storeData = unicode(str(sfEvent.data), 'utf-8', errors='replace') except BaseException as e: self.sf.fatal("Unhandled type detected: " + str(type(sfEvent.data))) else: storeData = sfEvent.data if truncateSize > 0: storeData = storeData[0:truncateSize] if sfEvent.sourceEventHash in ["", None]: self.sf.fatal("UNABLE TO CREATE RECORD WITH EMPTY SOURCE EVENT HASH!") qry = "INSERT INTO tbl_scan_results \ (scan_instance_id, hash, type, generated, confidence, \ visibility, risk, module, data, source_event_hash) \ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" qvals = [instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated, sfEvent.confidence, sfEvent.visibility, sfEvent.risk, sfEvent.module, storeData, sfEvent.sourceEventHash] #print "STORING: " + str(qvals) try: self.dbh.execute(qry, qvals) self.conn.commit() return None except sqlite3.Error as e: self.sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " + e.args[0]) # List of all previously run scans def scanInstanceList(self): # SQLite doesn't support OUTER JOINs, so we need a work-around that # does a UNION of scans with results and scans without results to # get a complete listing. qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \ FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \ AND r.type <> 'ROOT' GROUP BY i.guid \ UNION ALL \ SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \ ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \ FROM tbl_scan_instance i WHERE i.guid NOT IN ( \ SELECT distinct scan_instance_id FROM tbl_scan_results WHERE type <> 'ROOT') \ ORDER BY started DESC" try: self.dbh.execute(qry) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan list: " + e.args[0]) # History of data from the scan def scanResultHistory(self, instanceId): qry = "SELECT STRFTIME('%H:%M %w', generated, 'unixepoch') AS hourmin, \ type, COUNT(*) FROM tbl_scan_results \ WHERE scan_instance_id = ? GROUP BY hourmin, type" qvars = [instanceId] try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when fetching scan history: " + e.args[0]) # Get the source IDs, types and data for a set of IDs def scanElementSourcesDirect(self, instanceId, elementIdList): # the output of this needs to be aligned with scanResultEvent, # as other functions call both expecting the same output. qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND c.hash in (" qvars = [instanceId] for hashId in elementIdList: qry = qry + "'" + hashId + "'," qry += "'')" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when getting source element IDs: " + e.args[0]) # Get the child IDs, types and data for a set of IDs def scanElementChildrenDirect(self, instanceId, elementIdList): # the output of this needs to be aligned with scanResultEvent, # as other functions call both expecting the same output. qry = "SELECT ROUND(c.generated) AS generated, c.data, \ s.data as 'source_data', \ c.module, c.type, c.confidence, c.visibility, c.risk, c.hash, \ c.source_event_hash, t.event_descr, t.event_type, s.scan_instance_id, \ c.false_positive as 'fp', s.false_positive as 'parent_fp' \ FROM tbl_scan_results c, tbl_scan_results s, tbl_event_types t \ WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \ s.scan_instance_id = c.scan_instance_id AND \ t.event = c.type AND s.hash in (" qvars = [instanceId] for hashId in elementIdList: qry = qry + "'" + hashId + "'," qry += "'')" try: self.dbh.execute(qry, qvars) return self.dbh.fetchall() except sqlite3.Error as e: self.sf.error("SQL error encountered when getting child element IDs: " + e.args[0]) # Get the full set of upstream IDs which are parents to the # supplied set of IDs. # Data has to be in the format of output from scanElementSourcesDirect # and produce output in the same format. def scanElementSourcesAll(self, instanceId, childData): # Get the first round of source IDs for the leafs keepGoing = True nextIds = list() datamap = dict() pc = dict() for row in childData: # these must be unique values! parentId = row[9] childId = row[8] datamap[childId] = row if parentId in pc: if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [childId] # parents of the leaf set if parentId not in nextIds: nextIds.append(parentId) while keepGoing: parentSet = self.scanElementSourcesDirect(instanceId, nextIds) nextIds = list() keepGoing = False for row in parentSet: parentId = row[9] childId = row[8] datamap[childId] = row #print childId + " = " + str(row) if parentId in pc: if childId not in pc[parentId]: pc[parentId].append(childId) else: pc[parentId] = [childId] if parentId not in nextIds: nextIds.append(parentId) # Prevent us from looping at root if parentId != "ROOT": keepGoing = True datamap[parentId] = row return [datamap, pc] # Get the full set of downstream IDs which are children of the # supplied set of IDs # NOTE FOR NOW THE BEHAVIOR IS NOT THE SAME AS THE scanElementParent* # FUNCTIONS - THIS ONLY RETURNS IDS!! def scanElementChildrenAll(self, instanceId, parentIds): datamap = list() keepGoing = True nextIds = list() nextSet = self.scanElementChildrenDirect(instanceId, parentIds) for row in nextSet: datamap.append(row[8]) for row in nextSet: if row[8] not in nextIds: nextIds.append(row[8]) while keepGoing: nextSet = self.scanElementChildrenDirect(instanceId, nextIds) if nextSet == None or len(nextSet) == 0: keepGoing = False break for row in nextSet: datamap.append(row[8]) nextIds = list() nextIds.append(row[8]) return datamap