def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results.append(sourceData) self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Make potential phone numbers more friendly to parse content = eventData.replace('.','-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventName.startswith("EMAILADDR"): return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if type(eventData) not in [str, unicode]: self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData))) return None pat = re.compile("([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)") matches = re.findall(pat, eventData) myres = list() for match in matches: if len(match) < 4: self.sf.debug("Likely invalid address: " + match) continue # Handle messed up encodings if "%" in match: self.sf.debug("Skipped address: " + match) continue # Get the doain and strip potential ending . mailDom = match.lower().split('@')[1].strip('.') if not self.getTarget().matches(mailDom): self.sf.debug("Ignoring e-mail address on an external domain: " + match) continue self.sf.info("Found e-mail address: " + match) if type(match) == str: mail = unicode(match.strip('.'), 'utf-8', errors='replace') else: mail = match.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue else: myres.append(mail) evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.opts['api_key'] == "": self.sf.error("You enabled sfp_shodan but did not set an API key!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName == "DOMAIN_NAME": hosts = self.searchHosts(eventData) if hosts is None: return None evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", str(hosts), self.__name__, event) self.notifyListeners(evt) if eventName == 'WEB_ANALYTICS_ID': try: network = eventData.split(": ")[0] analytics_id = eventData.split(": ")[1] except BaseException as e: self.sf.error( "Unable to parse WEB_ANALYTICS_ID: " + eventData + " (" + str(e) + ")", False) return None if network not in [ 'Google AdSense', 'Google Analytics', 'Google Site Verification' ]: self.sf.debug("Skipping " + eventData + ", as not supported.") return None rec = self.searchHtml(analytics_id) if rec is None: return None evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", str(rec), self.__name__, event) self.notifyListeners(evt) return None if eventName == 'NETBLOCK_OWNER': if not self.opts['netblocklookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: qrylist.append(eventData) for addr in qrylist: rec = self.query(addr) if rec is None: continue evt = SpiderFootEvent("RAW_RIR_DATA", str(rec), self.__name__, event) self.notifyListeners(evt) if self.checkForStop(): return None if rec.get('os') is not None: # Notify other modules of what you've found evt = SpiderFootEvent("OPERATING_SYSTEM", rec.get('os') + " (" + addr + ")", self.__name__, event) self.notifyListeners(evt) if rec.get('devtype') is not None: # Notify other modules of what you've found evt = SpiderFootEvent("DEVICE_TYPE", rec.get('devtype') + " (" + addr + ")", self.__name__, event) self.notifyListeners(evt) if rec.get('country_name') is not None: location = ', '.join([ _f for _f in [rec.get('city'), rec.get('country_name')] if _f ]) evt = SpiderFootEvent("GEOINFO", location, self.__name__, event) self.notifyListeners(evt) if 'data' in rec: self.sf.info("Found SHODAN data for " + eventData) for r in rec['data']: port = str(r.get('port')) banner = r.get('banner') asn = r.get('asn') product = r.get('product') vulns = r.get('vulns') if port is not None: # Notify other modules of what you've found cp = addr + ":" + port evt = SpiderFootEvent("TCP_PORT_OPEN", cp, self.__name__, event) self.notifyListeners(evt) if banner is not None: # Notify other modules of what you've found evt = SpiderFootEvent("TCP_PORT_OPEN_BANNER", banner, self.__name__, event) self.notifyListeners(evt) if product is not None: evt = SpiderFootEvent("SOFTWARE_USED", product, self.__name__, event) self.notifyListeners(evt) if asn is not None: evt = SpiderFootEvent("BGP_AS_MEMBER", asn.replace("AS", ""), self.__name__, event) self.notifyListeners(evt) if vulns is not None: for vuln in list(vulns.keys()): evt = SpiderFootEvent('VULNERABILITY', vuln, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventData in self.results: return None else: self.results.append(eventData) for fileExt in self.opts['fileexts']: if self.checkForStop(): return None if "." + fileExt.lower() in eventData.lower(): # Fetch the file, allow much more time given that these files are # typically large. ret = self.sf.fetchUrl(eventData, timeout=self.opts['timeout'], useragent=self.opts['_useragent'], dontMangle=True, sizeLimit=10000000) if ret['content'] is None: self.sf.error( "Unable to fetch file for meta analysis: " + eventData, False) return None if len(ret['content']) < 512: self.sf.error( "Strange content encountered, size of " + str(len(ret['content'])), False) return None meta = None data = None # Based on the file extension, handle it if fileExt.lower() == "pdf": try: raw = StringIO(ret['content']) #data = metapdf.MetaPdfReader().read_metadata(raw) pdf = PyPDF2.PdfFileReader(raw, strict=False) data = pdf.getDocumentInfo() meta = str(data) self.sf.debug("Obtained meta data from " + eventData) except BaseException as e: self.sf.error( "Unable to parse meta data from: " + eventData + "(" + str(e) + ")", False) return None if fileExt.lower() in ["pptx", "docx", "xlsx"]: try: mtype = mimetypes.guess_type(eventData)[0] doc = openxmllib.openXmlDocument(data=ret['content'], mime_type=mtype) self.sf.debug("Office type: " + doc.mimeType) data = doc.allProperties meta = str(data) except ValueError as e: self.sf.error( "Unable to parse meta data from: " + eventData + "(" + str(e) + ")", False) return None except lxml.etree.XMLSyntaxError as e: self.sf.error( "Unable to parse XML within: " + eventData + "(" + str(e) + ")", False) return None except BaseException as e: self.sf.error( "Unable to process file: " + eventData + "(" + str(e) + ")", False) return None if fileExt.lower() in ["jpg", "jpeg", "tiff"]: try: raw = StringIO(ret['content']) data = exifread.process_file(raw) if data is None or len(data) == 0: continue meta = str(data) except BaseException as e: self.sf.error( "Unable to parse meta data from: " + eventData + "(" + str(e) + ")", False) return None if meta is not None and data is not None: evt = SpiderFootEvent("RAW_FILE_META_DATA", meta, self.__name__, event) self.notifyListeners(evt) val = None try: if "/Producer" in data: val = data['/Producer'] if "/Creator" in data: if "/Producer" in data: if data['/Creator'] != data['/Producer']: val = data['/Creator'] else: val = data['/Creator'] if "Application" in data: val = data['Application'] if "Image Software" in data: val = str(data['Image Software']) except BaseException as e: self.sf.error( "Failed to parse PDF, " + eventData + ": " + str(e), False) return None if val and not isinstance(val, PyPDF2.generic.NullObject): # Strip non-ASCII val = ''.join( [i if ord(i) < 128 else ' ' for i in val]) evt = SpiderFootEvent("SOFTWARE_USED", val, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventData in self.results: return None self.results[eventData] = True self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) if srcModuleName == 'sfp_grep_app': self.sf.debug("Ignoring " + eventData + ", from self.") return None hosts = list() page = 1 per_page = 10 pages = self.opts['max_pages'] while page <= pages: if self.checkForStop(): return None if self.errorState: return None res = self.query(eventData, page) if res is None: return None facets = res.get('facets') if facets is None: return None count = facets.get('count') if count is None: return None last_page = math.ceil(count / per_page) if last_page is None: pages = 0 if last_page < pages: pages = last_page self.sf.info("Parsing page " + str(page) + " of " + str(pages)) page += 1 hits = res.get('hits') if hits is None: return None data = hits.get('hits') if data is None: return None for result in data: if result is None: continue evt = SpiderFootEvent("RAW_RIR_DATA", str(result), self.__name__, event) self.notifyListeners(evt) content = result.get('content') if content is None: continue snippet = content.get('snippet') if snippet is None: continue links = self.sf.extractUrls( snippet.replace('<mark>', '').replace('</mark>', '')) if links: for link in links: if link in self.results: continue host = self.sf.urlFQDN(link) if not self.getTarget().matches(host, includeChildren=True, includeParents=True): continue hosts.append(host) if not self.getTarget().matches(self.sf.urlFQDN(link), includeChildren=True, includeParents=True): self.sf.debug("Skipped unrelated link: " + link) continue self.sf.debug('Found a link: ' + link) evt = SpiderFootEvent('LINKED_URL_INTERNAL', link, self.__name__, event) self.notifyListeners(evt) self.results[link] = True emails = self.sf.parseEmails( snippet.replace('<mark>', '').replace('</mark>', '')) if emails: for email in emails: if email in self.results: continue mail_domain = email.lower().split('@')[1] if not self.getTarget().matches(mail_domain, includeChildren=True, includeParents=True): self.sf.debug("Skipped unrelated email address: " + email) continue self.sf.info("Found e-mail address: " + email) if email.split("@")[0] in self.opts[ '_genericusers'].split(","): evttype = "EMAILADDR_GENERIC" else: evttype = "EMAILADDR" evt = SpiderFootEvent(evttype, email, self.__name__, event) self.notifyListeners(evt) self.results[email] = True for host in set(hosts): if self.checkForStop(): return None if self.errorState: return None if self.opts['dns_resolve'] and not self.sf.resolveHost(host): self.sf.debug("Host " + host + " could not be resolved") evt = SpiderFootEvent("INTERNET_NAME_UNRESOLVED", host, self.__name__, event) self.notifyListeners(evt) continue evt = SpiderFootEvent("INTERNET_NAME", host, self.__name__, event) self.notifyListeners(evt) if self.sf.isDomain(host, self.opts["_internettlds"]): evt = SpiderFootEvent("DOMAIN_NAME", host, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") # Don't look up stuff twice if eventData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[eventData] = True if eventName == 'NETBLOCK_OWNER': if not self.opts['netblocklookup']: return None if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None if eventName == 'NETBLOCK_MEMBER': if not self.opts['subnetlookup']: return None if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxsubnet'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: # If user has enabled affiliate checking if eventName == "AFFILIATE_IPADDR" and not self.opts[ 'checkaffiliates']: return None qrylist.append(eventData) for addr in qrylist: if self.checkForStop(): return None data = self.queryIPAddress(addr) if data is None: break maliciousIP = data.get('ip_addr') if maliciousIP is None: continue if addr != maliciousIP: self.sf.error( "Reported address doesn't match requested, skipping", False) continue blacklistedRecords = data.get('blacklist') if blacklistedRecords is None or len(blacklistedRecords) == 0: self.sf.debug("No blacklist information found for IP") continue # Data is reported about the IP Address if eventName.startswith("NETBLOCK_"): ipEvt = SpiderFootEvent("IP_ADDRESS", addr, self.__name__, event) self.notifyListeners(ipEvt) if eventName.startswith("NETBLOCK_"): evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__, ipEvt) self.notifyListeners(evt) else: evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__, event) self.notifyListeners(evt) maliciousIPDesc = f"Maltiverse [{maliciousIP}]\n" for blacklistedRecord in blacklistedRecords: lastSeen = blacklistedRecord.get('last_seen') if lastSeen is None: continue try: lastSeenDate = datetime.strptime(str(lastSeen), "%Y-%m-%d %H:%M:%S") except BaseException: self.sf.error("Invalid date in JSON response, skipping", False) continue today = datetime.now() difference = (today - lastSeenDate).days if difference > int(self.opts["age_limit_days"]): self.sf.debug( "Record found is older than age limit, skipping") continue maliciousIPDesc += " - DESCRIPTION : " + str( blacklistedRecord.get("description")) + "\n" maliciousIPDescHash = self.sf.hashstring(maliciousIPDesc) if maliciousIPDescHash in self.results: continue self.results[maliciousIPDescHash] = True if eventName.startswith("NETBLOCK_"): evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc, self.__name__, ipEvt) elif eventName.startswith("AFFILIATE_"): evt = SpiderFootEvent("MALICIOUS_AFFILIATE_IPADDR", maliciousIPDesc, self.__name__, event) else: evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.opts['apikey'] == "": self.sf.error("You enabled sfp_shodan but did not set an API key!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName == 'NETBLOCK_OWNER' and self.opts['netblocklookup']: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: qrylist.append(eventData) for addr in qrylist: rec = self.query(addr) if rec is None: continue if self.checkForStop(): return None if rec.get('os') is not None: # Notify other modules of what you've found evt = SpiderFootEvent("OPERATING_SYSTEM", rec.get('os') + " (" + addr + ")", self.__name__, event) self.notifyListeners(evt) if rec.get('devtype') is not None: # Notify other modules of what you've found evt = SpiderFootEvent("DEVICE_TYPE", rec.get('devtype') + " (" + addr + ")", self.__name__, event) self.notifyListeners(evt) if 'data' in rec: self.sf.info("Found SHODAN data for " + eventData) for r in rec['data']: port = str(r.get('port')) banner = r.get('banner') if port is not None: # Notify other modules of what you've found cp = addr + ":" + port evt = SpiderFootEvent("TCP_PORT_OPEN", cp, self.__name__, event) self.notifyListeners(evt) if banner is not None: # Notify other modules of what you've found evt = SpiderFootEvent("TCP_PORT_OPEN_BANNER", banner, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.currentEventSrc = event self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName == "DOMAIN_NAME": ret = self.query("domain", eventData) if not ret: return None if "pocs" in ret: if "pocRef" in ret['pocs']: ref = list() # Might be a list or a dictionary if type(ret['pocs']['pocRef']) == dict: ref = [ret['pocs']['pocRef']] else: ref = ret['pocs']['pocRef'] for p in ref: name = p['@name'] if "," in name: sname = name.split(", ", 1) name = sname[1] + " " + sname[0] # A bit of a hack. The reason we do this is because # the names are separated in the content and sfp_names # won't recognise it. So we submit this and see if it # really is considered a name. evt = SpiderFootEvent("RAW_RIR_DATA", "Possible full name: " + name, self.__name__, self.currentEventSrc) self.notifyListeners(evt) # We just want the raw data so we can get potential # e-mail addresses. self.query("contact", p['$']) if eventName == "HUMAN_NAME": ret = self.query("name", eventData) if not ret: return None if "pocs" in ret: if "pocRef" in ret['pocs']: ref = list() # Might be a list or a dictionary if type(ret['pocs']['pocRef']) == dict: ref = [ret['pocs']['pocRef']] else: ref = ret['pocs']['pocRef'] for p in ref: # We just want the raw data so we can get potential # e-mail addresses. self.query("contact", p['$'])
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.actualSource if url is not None: if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == str: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) name = str(name) # Names don't have numbers if re.match("[0-9]*", name): return None # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile( "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)" ) m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug( "Both first and second names are not in the dictionary, so high chance of name: (" + first + ":" + second + ").") p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.opts['apikey'] == "": self.sf.error( "You enabled sfp_virustotal but did not set an API key!", False) return None # Don't look up stuff twice if self.results.has_key(eventData): self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName.startswith( "AFFILIATE") and not self.opts['checkaffiliates']: return None if eventName == 'CO_HOSTED_SITE' and not self.opts['checkcohosts']: return None if eventName == 'NETBLOCK_OWNER' and self.opts['netblocklookup']: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + \ str(IPNetwork(eventData).prefixlen) + " > " + \ str(self.opts['maxnetblock'])) return None if eventName == 'NETBLOCK_MEMBER' and self.opts['subnetlookup']: if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']: self.sf.debug("Network size bigger than permitted: " + \ str(IPNetwork(eventData).prefixlen) + " > " + \ str(self.opts['maxsubnet'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: qrylist.append(eventData) for addr in qrylist: if self.checkForStop(): return None info = self.query(addr) if info == None: continue if info.has_key('detected_urls'): self.sf.info("Found VirusTotal URL data for " + addr) if eventName in ["IP_ADDRESS" ] or eventName.startswith("NETBLOCK_"): evt = "MALICIOUS_IPADDR" infotype = "ip-address" if eventName == "AFFILIATE_IPADDR": evt = "MALICIOUS_AFFILIATE_IPADDR" infotype = "ip-address" if eventName == "INTERNET_NAME": evt = "MALICIOUS_INTERNET_NAME" infotype = "domain" if eventName == "AFFILIATE_INTERNET_NAME": evt = "MALICIOUS_AFFILIATE_INTERNET_NAME" infotype = "domain" if eventName == "CO_HOSTED_SITE": evt = "MALICIOUS_COHOST" infotype = "domain" infourl = "<SFURL>https://www.virustotal.com/en/" + infotype + "/" + \ addr + "/information/</SFURL>" # Notify other modules of what you've found e = SpiderFootEvent(evt, "VirusTotal [" + addr + "]\n" + \ infourl, self.__name__, event) self.notifyListeners(e)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.currentEventSrc = event self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results.append(eventData) if self.keywords is None: self.keywords = self.sf.domainKeywords(self.getTarget().getNames(), self.opts['_internettlds']) for site in sites.keys(): s = unicode(sites[site][0]).format(eventData) searchStr = s.replace(" ", "%20") searchDom = sites[site][1] if self.opts['method'].lower() == "google": results = self.sf.googleIterate( searchStr, dict(limit=self.opts['pages'], useragent=self.opts['_useragent'], timeout=self.opts['_fetchtimeout'])) if self.opts['method'].lower() == "yahoo": results = self.sf.yahooIterate( searchStr, dict(limit=self.opts['pages'], useragent=self.opts['_useragent'], timeout=self.opts['_fetchtimeout'])) if self.opts['method'].lower() == "bing": results = self.sf.bingIterate( searchStr, dict(limit=self.opts['pages'], useragent=self.opts['_useragent'], timeout=self.opts['_fetchtimeout'])) if results is None: self.sf.info("No data returned from " + self.opts['method'] + ".") return None if self.checkForStop(): return None pauseSecs = random.randint(4, 15) self.sf.debug("Pausing for " + str(pauseSecs)) time.sleep(pauseSecs) for key in results.keys(): instances = list() # Yahoo requires some additional parsing if self.opts['method'].lower() == "yahoo": res = re.sub("RU=(.[^\/]+)\/RK=", self.yahooCleaner, results[key], 0) else: res = results[key] matches = re.findall(searchDom, res, re.IGNORECASE) if matches is not None: for match in matches: if match in instances: continue else: instances.append(match) if self.checkForStop(): return None # Fetch the profile page if we are checking # for a firm relationship. if self.opts['tighten']: pres = self.sf.fetchUrl( match, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent']) if pres['content'] is None: continue else: found = False for kw in self.keywords: if re.search( "[^a-zA-Z\-\_]" + kw + "[^a-zA-Z\-\_]", pres['content'], re.IGNORECASE): found = True if not found: continue self.sf.info("Social Media Profile found at " + site + ": " + match) evt = SpiderFootEvent("SOCIAL_MEDIA", site + ": " + match, self.__name__, event) self.notifyListeners(evt) # Submit the bing results for analysis evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", res, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if srcModuleName == 'sfp_peegeepee': self.sf.debug("Ignoring " + eventName + ", from self.") return None if eventData in self.results: self.sf.debug("Skipping " + eventData + ", already checked.") return None self.results[eventData] = True keys = self.query(eventData) if not keys: self.sf.debug('No results found for ' + eventData) return None names = list() emails = list() for key in keys: name = keys.get(key)[0] email = keys.get(key)[1] if not email: continue # Get e-mail addresses on this domain if eventName == 'DOMAIN_NAME' or eventName == 'INTERNET_NAME': mailDom = email.lower().split('@')[1] if not self.getTarget().matches(mailDom): continue # Retrieve names for the specified e-mail address if eventName == 'EMAILADDR': if not email.lower() == eventData.lower(): continue emails.append(email) names.append(name) for name in set(names): # A bit of a hack. Submit the description to sfp_names # and see if it is considered to be a name. evt = SpiderFootEvent('RAW_RIR_DATA', 'Possible full name: ' + name, self.__name__, event) self.notifyListeners(evt) for email in set(emails): evt = SpiderFootEvent('EMAILADDR', email, self.__name__, event) self.notifyListeners(evt) if self.opts['fetch_keys']: for key in keys: self.retrieveKey(key, event) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None self.results[eventData] = True data = self.query(eventData) if data is None: return None results = data.get('results') if not results: return None evt = SpiderFootEvent('RAW_RIR_DATA', str(results), self.__name__, event) self.notifyListeners(evt) urls = list() asns = list() domains = list() locations = list() servers = list() for res in results: page = res.get('page') if not page: continue domain = page.get('domain') if not domain: continue if not self.getTarget().matches(domain, includeParents=True): continue if domain.lower() != eventData.lower(): domains.append(domain) asn = page.get('asn') if asn: asns.append(asn.replace('AS', '')) location = ', '.join( [_f for _f in [page.get('city'), page.get('country')] if _f]) if location: locations.append(location) server = page.get('server') if server: servers.append(server) task = res.get('task') if not task: continue url = task.get('url') if self.getTarget().matches(self.sf.urlFQDN(url), includeParents=True): urls.append(url) for url in set(urls): evt = SpiderFootEvent('LINKED_URL_INTERNAL', url, self.__name__, event) self.notifyListeners(evt) for location in set(locations): evt = SpiderFootEvent('GEOINFO', location, self.__name__, event) self.notifyListeners(evt) if self.opts['verify'] and len(domains) > 0: self.sf.info("Resolving " + str(len(set(domains))) + " domains ...") for domain in set(domains): if self.opts['verify'] and not self.sf.resolveHost(domain): evt = SpiderFootEvent('INTERNET_NAME_UNRESOLVED', domain, self.__name__, event) self.notifyListeners(evt) else: evt = SpiderFootEvent('INTERNET_NAME', domain, self.__name__, event) self.notifyListeners(evt) if self.sf.isDomain(domain, self.opts['_internettlds']): evt = SpiderFootEvent('DOMAIN_NAME', domain, self.__name__, event) self.notifyListeners(evt) for asn in set(asns): evt = SpiderFootEvent('BGP_AS_MEMBER', asn, self.__name__, event) self.notifyListeners(evt) for server in set(servers): evt = SpiderFootEvent('WEBSERVER_BANNER', server, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") if srcModuleName == 'sfp_fringeproject': self.sf.debug("Ignoring " + eventData + ", from self.") return None if eventData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[eventData] = True data = self.query(eventData) if not data: self.sf.info("No results found for " + eventData) return None e = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__, event) self.notifyListeners(e) hosts = list() for result in data: data_type = result.get('type') if data_type not in ['url', 'hostname']: self.sf.debug('Unknown result data type: ' + data_type) continue value = result.get('value') if not value: continue if data_type == 'hostname': if not self.getTarget().matches( value, includeChildren=True, includeParents=True): continue hosts.append(value) if data_type == 'url': host = self.sf.urlFQDN(value.lower()) if not self.getTarget().matches( host, includeChildren=True, includeParents=True): continue hosts.append(host) evt = SpiderFootEvent('LINKED_URL_INTERNAL', value, self.__name__, event) self.notifyListeners(evt) tags = result.get('tags') if not tags: continue for tag in tags: try: port = re.findall(r'^port:([0-9]+)', tag) except BaseException: self.sf.debug("Didn't get sane data from FringeProject.") continue if len(port) > 0: evt = SpiderFootEvent('TCP_PORT_OPEN', value + ':' + str(port[0]), self.__name__, event) self.notifyListeners(evt) for host in set(hosts): evt = SpiderFootEvent('INTERNET_NAME', host, self.__name__, event) self.notifyListeners(evt) if self.sf.isDomain(host, self.opts['_internettlds']): evt = SpiderFootEvent('DOMAIN_NAME', host, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType eventData = event.data if self.opts['affiliatedomains'] and "AFFILIATE_" in eventName: eventData = self.sf.hostDomain(eventData, self.opts['_internettlds']) if not eventData: return None if eventData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[eventData] = True url = "https://api.duckduckgo.com/?q=" + eventData + "&format=json&pretty=1" res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent="SpiderFoot") if res['content'] is None: self.sf.error(f"Unable to fetch {url}", False) return None try: ret = json.loads(res['content']) except BaseException as e: self.sf.error( f"Error processing JSON response from DuckDuckGo: {e}", False) return None if not ret['Heading']: self.sf.debug(f"No DuckDuckGo information for {eventData}") return None # Submit the DuckDuckGo results for analysis evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", res['content'], self.__name__, event) self.notifyListeners(evt) abstract_text = ret.get('AbstractText') if abstract_text: event_type = "DESCRIPTION_ABSTRACT" if "AFFILIATE" in eventName: event_type = "AFFILIATE_" + event_type evt = SpiderFootEvent(event_type, str(abstract_text), self.__name__, event) self.notifyListeners(evt) related_topics = ret.get('RelatedTopics') if related_topics: event_type = "DESCRIPTION_CATEGORY" if "AFFILIATE" in eventName: event_type = "AFFILIATE_" + event_type for topic in related_topics: if not isinstance(topic, dict): self.sf.debug("No category text found from DuckDuckGo.") continue category = topic.get('Text') if not category: self.sf.debug("No category text found from DuckDuckGo.") continue evt = SpiderFootEvent(event_type, category, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Various ways to identify companies in text # Support up to three word company names with each starting with # a capital letter, allowing for hyphens brackets and numbers within. pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+" pattern_match_re = [ 'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?', 'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ] pattern_match = [ 'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty', 'Ltd', 'Pte', 'Inc', 'INC', 'Foundation', 'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V', 'NV', 'N.V' 'PLC', 'Limited', 'Pvt.', 'SARL' ] pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])" # Filter out anything from the company name which matches the below filterpatterns = [ "Copyright", "\d{4}" # To catch years ] # Don't re-parse company names if eventName in [ "COMPANY_NAME", "AFFILIATE_COMPANY_NAME" ]: return None if eventName == "TARGET_WEB_CONTENT": url = event.sourceEvent.data if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.") if type(eventData) not in [str, unicode]: try: if type(eventData) in [ list, dict ]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find company names: " + \ str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None # Strip out everything before the O= try: if eventName == "SSL_CERTIFICATE_ISSUED": eventData = eventData.split("O=")[1] except BaseException as e: self.sf.debug("Couldn't strip out O=, proceeding anyway...") # Find chunks of text containing what might be a company name first. # This is to avoid running very expensive regexps on large chunks of # data. chunks = list() for pat in pattern_match: start = 0 m = eventData.find(pat, start) while m > 0: start = m - 50 if start < 0: start = 0 end = m + 10 if end >= len(eventData): end = len(eventData)-1 chunks.append(eventData[start:end]) offset = m + len(pat) m = eventData.find(pat, offset) myres = list() for chunk in chunks: for pat in pattern_match_re: matches = re.findall(pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk, re.MULTILINE|re.DOTALL) for match in matches: matched = 0 for m in match: if len(m) > 0: matched += 1 if matched <= 1: continue fullcompany = "" for m in match: flt = False for f in filterpatterns: if re.match(f, m): flt = True if not flt: fullcompany += m + " " fullcompany = re.sub("\s+", " ", fullcompany.strip()) self.sf.info("Found company name: " + fullcompany) if fullcompany in myres: self.sf.debug("Already found from this source.") continue else: myres.append(fullcompany) if "AFFILIATE_" in eventName: etype = "AFFILIATE_COMPANY_NAME" else: etype = "COMPANY_NAME" evt = SpiderFootEvent(etype, fullcompany, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def __startScan(self): """Start running a scan.""" aborted = False self.__setStatus("STARTING", time.time() * 1000, None) self.__sf.status(f"Scan [{self.__scanId}] initiated.") try: # moduleList = list of modules the user wants to run for modName in self.__moduleList: if modName == '': continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.__sf.error("Failed to load module: " + modName, False) continue mod = getattr(module, modName)() mod.__name__ = modName # Module may have been renamed or removed if modName not in self.__config['__modules__']: continue # Set up the module # Configuration is a combined global config with module-specific options self.__modconfig[modName] = deepcopy(self.__config['__modules__'][modName]['opts']) for opt in list(self.__config.keys()): self.__modconfig[modName][opt] = deepcopy(self.__config[opt]) mod.clearListeners() # clear any listener relationships from the past mod.setup(self.__sf, self.__modconfig[modName]) mod.setDbh(self.__dbh) mod.setScanId(self.__scanId) # Give modules a chance to 'enrich' the original target with # aliases of that target. newTarget = mod.enrichTarget(self.__target) if newTarget is not None: self.__target = newTarget self.__moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.__config['_socks1type'] != '': mod._updateSocket(socket) # Set up event output filters if requested if self.__config['__outputfilter']: mod.setOutputFilter(self.__config['__outputfilter']) self.__sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in list(self.__moduleInstances.values()): # Register the target with the module module.setTarget(self.__target) for listenerModule in list(self.__moduleInstances.values()): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() is not None: module.registerListener(listenerModule) # Now we are ready to roll.. self.__setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(self.__target) psMod.setDbh(self.__dbh) psMod.clearListeners() for mod in list(self.__moduleInstances.values()): if mod.watchedEvents() is not None: psMod.registerListener(mod) # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.__targetType == 'INTERNET_NAME': if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing for module in list(self.__moduleInstances.values()): if module.checkForStop(): self.__setStatus('ABORTING') aborted = True break if aborted: self.__sf.status(f"Scan [{self.__scanId}] aborted.") self.__setStatus("ABORTED", None, time.time() * 1000) else: self.__sf.status(f"Scan [{self.__scanId}] completed.") self.__setStatus("FINISHED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.__sf.error(f"Unhandled exception ({e.__class__.__name__}) encountered during scan." + "Please report this as a bug: " + repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) self.__dbh.close()
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.opts['api_key'] == "": self.sf.error( "You enabled sfp_virustotal but did not set an API key!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName.startswith( "AFFILIATE") and not self.opts['checkaffiliates']: return None if eventName == 'CO_HOSTED_SITE' and not self.opts['checkcohosts']: return None if eventName == 'NETBLOCK_OWNER': if not self.opts['netblocklookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None if eventName == 'NETBLOCK_MEMBER': if not self.opts['subnetlookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxsubnet'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: qrylist.append(eventData) for addr in qrylist: if self.checkForStop(): return None info = self.query(addr) if info is None: continue if len(info.get('detected_urls', [])) > 0: self.sf.info("Found VirusTotal URL data for " + addr) if eventName in ["IP_ADDRESS" ] or eventName.startswith("NETBLOCK_"): evt = "MALICIOUS_IPADDR" infotype = "ip-address" if eventName == "AFFILIATE_IPADDR": evt = "MALICIOUS_AFFILIATE_IPADDR" infotype = "ip-address" if eventName == "INTERNET_NAME": evt = "MALICIOUS_INTERNET_NAME" infotype = "domain" if eventName == "AFFILIATE_INTERNET_NAME": evt = "MALICIOUS_AFFILIATE_INTERNET_NAME" infotype = "domain" if eventName == "CO_HOSTED_SITE": evt = "MALICIOUS_COHOST" infotype = "domain" infourl = "<SFURL>https://www.virustotal.com/en/" + infotype + "/" + \ addr + "/information/</SFURL>" # Notify other modules of what you've found e = SpiderFootEvent(evt, "VirusTotal [" + addr + "]\n" + infourl, self.__name__, event) self.notifyListeners(e) # Treat siblings as affiliates if they are of the original target, otherwise # they are additional hosts within the target. if 'domain_siblings' in info: if eventName in ["IP_ADDRESS", "INTERNET_NAME"]: for s in info['domain_siblings']: if self.getTarget().matches(s): if s not in self.results: if self.opts['verify']: if not self.sf.resolveHost(s): e = SpiderFootEvent( "INTERNET_NAME_UNRESOLVED", s, self.__name__, event) self.notifyListeners(e) else: e = SpiderFootEvent( "INTERNET_NAME", s, self.__name__, event) self.notifyListeners(e) if self.sf.isDomain( s, self.opts['_internettlds']): e = SpiderFootEvent( "DOMAIN_NAME", s, self.__name__, event) self.notifyListeners(e) else: if s not in self.results: e = SpiderFootEvent("AFFILIATE_INTERNET_NAME", s, self.__name__, event) self.notifyListeners(e) if 'subdomains' in info and eventName == "INTERNET_NAME": for n in info['subdomains']: if n not in self.results: if self.opts['verify']: if not self.sf.resolveHost(n): e = SpiderFootEvent("INTERNET_NAME_UNRESOLVED", n, self.__name__, event) self.notifyListeners(e) else: e = SpiderFootEvent("INTERNET_NAME", n, self.__name__, event) self.notifyListeners(e) if self.sf.isDomain(n, self.opts['_internettlds']): e = SpiderFootEvent("DOMAIN_NAME", n, self.__name__, event) self.notifyListeners(e)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventData in self.results: self.sf.debug("Already did a search for " + eventData + ", skipping.") return None else: self.results.append(eventData) if eventName == "DOMAIN_NAME": name = self.sf.domainKeyword(eventData, self.opts['_internettlds']) if eventName == "USERNAME": name = eventData if eventName == "SOCIAL_MEDIA": name = eventData.split(": ")[1] self.sf.debug("Looking at " + name) failed = False # Get all the repositories based on direct matches with the # name identified url = "https://api.github.com/search/repositories?q=" + name res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent="SpiderFoot") if res['content'] == None: self.sf.error("Unable to fetch " + url, False) failed = True try: ret = json.loads(res['content']) except BaseException as e: ret = None if ret == None: self.sf.error("Unable to process empty response from Github for: " + \ name, False) failed = True if not failed: if ret['total_count'] == "0" or len(ret['items']) == 0: self.sf.debug("No Github information for " + name) failed = True if not failed: for item in ret['items']: repo_info = self.buildRepoInfo(item) if repo_info != None: if self.opts['namesonly'] and name not in item['name']: continue evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info, self.__name__, event) self.notifyListeners(evt) # Now look for users matching the name found failed = False url = "https://api.github.com/search/users?q=" + name res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent="SpiderFoot") if res['content'] == None: self.sf.error("Unable to fetch " + url, False) failed = True if not failed: ret = json.loads(res['content']) if ret == None: self.sf.error("Unable to process empty response from Github for: " + \ name, False) failed = True if not failed: if ret['total_count'] == "0" or len(ret['items']) == 0: self.sf.debug("No Github information for " + name) failed = True if not failed: # For each user matching the name, get their repos for item in ret['items']: if item['repos_url'] == None: self.sf.debug( "Incomplete Github information found (repos_url).") continue url = item['repos_url'] res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent="SpiderFoot") if res['content'] == None: self.sf.error("Unable to fetch " + url, False) continue repret = json.loads(res['content']) if repret == None: self.sf.error("Unable to process empty response from Github for: " + \ name, False) continue for item in repret: repo_info = self.buildRepoInfo(item) if repo_info != None: if self.opts['namesonly'] and name not in item['name']: continue evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventData in self.results: self.sf.debug("Skipping " + eventData + ", already checked.") return None else: self.results[eventData] = True if eventName == 'CO_HOSTED_SITE' and not self.opts.get( 'checkcohosts', False): return None if eventName == 'AFFILIATE_IPADDR' \ and not self.opts.get('checkaffiliates', False): return None if eventName == 'NETBLOCK_OWNER' and not self.opts.get( 'checknetblocks', False): return None if eventName == 'NETBLOCK_MEMBER' and not self.opts.get( 'checksubnets', False): return None for check in list(malchecks.keys()): cid = malchecks[check]['id'] # If the module is enabled.. if self.opts[cid]: if eventName in ['IP_ADDRESS', 'AFFILIATE_IPADDR']: typeId = 'ip' if eventName == 'IP_ADDRESS': evtType = 'MALICIOUS_IPADDR' else: evtType = 'MALICIOUS_AFFILIATE_IPADDR' if eventName in ['BGP_AS_OWNER', 'BGP_AS_MEMBER']: typeId = 'asn' evtType = 'MALICIOUS_ASN' if eventName in [ 'INTERNET_NAME', 'CO_HOSTED_SITE', 'AFFILIATE_INTERNET_NAME', ]: typeId = 'domain' if eventName == "INTERNET_NAME": evtType = "MALICIOUS_INTERNET_NAME" if eventName == 'AFFILIATE_INTERNET_NAME': evtType = 'MALICIOUS_AFFILIATE_INTERNET_NAME' if eventName == 'CO_HOSTED_SITE': evtType = 'MALICIOUS_COHOST' if eventName == 'NETBLOCK_OWNER': typeId = 'netblock' evtType = 'MALICIOUS_NETBLOCK' if eventName == 'NETBLOCK_MEMBER': typeId = 'netblock' evtType = 'MALICIOUS_SUBNET' url = self.lookupItem(cid, typeId, eventData) if self.checkForStop(): return None # Notify other modules of what you've found if url is not None: text = check + " [" + eventData + "]\n" + "<SFURL>" + url + "</SFURL>" evt = SpiderFootEvent(evtType, text, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data parentEvent = event.sourceEvent eventSource = event.sourceEvent.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventSource in self.results: return None else: self.results[eventSource] = True if not self.getTarget().matches(self.sf.urlFQDN(eventSource)): self.sf.debug( "Not collecting web server information for external sites.") return None try: jdata = json.loads(eventData) if jdata == None: return None except BaseException as e: self.sf.error( "Received HTTP headers from another module in an unexpected format.", False) return None # Could apply some smarts here, for instance looking for certain # banners and therefore classifying them further (type and version, # possibly OS. This could also trigger additional tests, such as 404s # and other errors to see what the header looks like. if 'server' in jdata: evt = SpiderFootEvent("WEBSERVER_BANNER", jdata['server'], self.__name__, parentEvent) self.notifyListeners(evt) self.sf.info("Found web server: " + jdata['server'] + " (" + eventSource + ")") if 'x-powered-by' in jdata: evt = SpiderFootEvent("WEBSERVER_TECHNOLOGY", jdata['x-powered-by'], self.__name__, parentEvent) self.notifyListeners(evt) return None tech = None if 'set-cookie' in jdata and 'PHPSESS' in jdata['set-cookie']: tech = "PHP" if 'set-cookie' in jdata and 'JSESSIONID' in jdata['set-cookie']: tech = "Java/JSP" if 'set-cookie' in jdata and 'ASP.NET' in jdata['set-cookie']: tech = "ASP.NET" if 'x-aspnet-version' in jdata: tech = "ASP.NET" if tech is not None and '.jsp' in eventSource: tech = "Java/JSP" if tech is not None and '.php' in eventSource: tech = "PHP" if tech is not None: evt = SpiderFootEvent("WEBSERVER_TECHNOLOGY", tech, self.__name__, parentEvent) self.notifyListeners(evt)
def getIssuer(self, cert, sevt): issuer = cert.get_issuer().as_text().encode('raw_unicode_escape') evt = SpiderFootEvent("SSL_CERTIFICATE_ISSUER", issuer, self.__name__, sevt) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType eventData = event.data if eventData in self.results: self.sf.debug(f"Already did a search for {eventData}, skipping.") return None self.results[eventData] = True # Extract name and location from profile if eventName == "SOCIAL_MEDIA": try: network = eventData.split(": ")[0] url = eventData.split(": ")[1].replace("<SFURL>", "").replace("</SFURL>", "") except BaseException as e: self.sf.error(f"Unable to parse SOCIAL_MEDIA: {eventData} ({e})", False) return None if not network == "Github": self.sf.debug(f"Skipping social network profile, {url}, as not a GitHub profile") return None try: urlParts = url.split("/") username = urlParts[len(urlParts)-1] except BaseException: self.sf.debug(f"Couldn't get a username out of {url}") return None res = self.sf.fetchUrl( f"https://api.github.com/users/{username}", timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'] ) if res['content'] is None: return None try: json_data = json.loads(res['content']) except BaseException as e: self.sf.debug(f"Error processing JSON response: {e}") return None if not json_data.get('login'): self.sf.debug(f"{username} is not a valid GitHub profile") return None full_name = json_data.get('name') if not full_name: self.sf.debug(f"{username} is not a valid GitHub profile") return None e = SpiderFootEvent("RAW_RIR_DATA", "Possible full name: {full_name}", self.__name__, event) self.notifyListeners(e) location = json_data.get('location') if location is None: return None if len(location) < 3 or len(location) > 100: self.sf.debug(f"Skipping likely invalid location: {location}") return None e = SpiderFootEvent("GEOINFO", location, self.__name__, event) self.notifyListeners(e) return None if eventName == "DOMAIN_NAME": username = self.sf.domainKeyword(eventData, self.opts['_internettlds']) if not username: return None if eventName == "USERNAME": username = eventData self.sf.debug(f"Looking at {username}") failed = False # Get all the repositories based on direct matches with the # name identified url = f"https://api.github.com/search/repositories?q={username}" res = self.sf.fetchUrl( url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'] ) if res['content'] is None: self.sf.error(f"Unable to fetch {url}", False) failed = True if not failed: try: ret = json.loads(res['content']) except BaseException as e: self.sf.debug(f"Error processing JSON response from GitHub: {e}") ret = None if ret is None: self.sf.error(f"Unable to process empty response from Github for: {username}", False) failed = True if not failed: if ret.get('total_count', "0") == "0" or len(ret['items']) == 0: self.sf.debug(f"No Github information for {username}") failed = True if not failed: for item in ret['items']: repo_info = self.buildRepoInfo(item) if repo_info is not None: if self.opts['namesonly'] and username != item['name']: continue evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info, self.__name__, event) self.notifyListeners(evt) # Now look for users matching the name found failed = False url = f"https://api.github.com/search/users?q={username}" res = self.sf.fetchUrl( url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'] ) if res['content'] is None: self.sf.error(f"Unable to fetch {url}", False) failed = True if not failed: try: ret = json.loads(res['content']) if ret is None: self.sf.error(f"Unable to process empty response from Github for: {username}", False) failed = True except BaseException: self.sf.error(f"Unable to process invalid response from Github for: {username}", False) failed = True if not failed: if ret.get('total_count', "0") == "0" or len(ret['items']) == 0: self.sf.debug("No Github information for " + username) failed = True if not failed: # For each user matching the username, get their repos for item in ret['items']: if item.get('repos_url') is None: self.sf.debug("Incomplete Github information found (repos_url).") continue url = item['repos_url'] res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent']) if res['content'] is None: self.sf.error(f"Unable to fetch {url}", False) continue try: repret = json.loads(res['content']) except BaseException as e: self.sf.error(f"Invalid JSON returned from Github: {e}", False) continue if repret is None: self.sf.error(f"Unable to process empty response from Github for: {username}", False) continue for item in repret: if type(item) != dict: self.sf.debug("Encountered an unexpected or empty response from Github.") continue repo_info = self.buildRepoInfo(item) if repo_info is not None: if self.opts['namesonly'] and item['name'] != username: continue if eventName == "USERNAME" and "/" + username + "/" not in item.get('html_url', ''): continue evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data parentEvent = event sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.results.has_key(eventData): return None self.results[eventData] = True for domain in self.checks: try: lookup = self.reverseAddr(eventData) + "." + domain sf.debug("Checking Blacklist: " + lookup) addrs = socket.gethostbyname_ex(lookup) sf.debug("Addresses returned: " + str(addrs)) text = None for addr in addrs: if type(addr) == list: for a in addr: if type(self.checks[domain]) is str: text = self.checks[domain] break else: if str(a) not in self.checks[domain].keys(): sf.debug("Return code not found in list: " + str(a)) continue k = str(a) text = self.checks[domain][k] break else: if type(self.checks[domain]) is str: text = self.checks[domain] break else: if str(addr) not in self.checks.keys(): sf.debug("Return code not found in list: " + str(addr)) continue k = str(addr) text = self.checks[domain][k] break if text != None: if eventName == "AFFILIATE_IPADDR": evt = SpiderFootEvent('BLACKLISTED_AFFILIATE_IPADDR', text, self.__name__, parentEvent) self.notifyListeners(evt) else: evt = SpiderFootEvent('BLACKLISTED_IPADDR', text, self.__name__, parentEvent) self.notifyListeners(evt) except BaseException as e: sf.debug("Unable to resolve " + eventData + " / " + lookup + ": " + str(e)) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") if self.opts['api_key'] == "": self.sf.error( "You enabled sfp_greynoise but did not set an API key!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None else: self.results[eventData] = True if eventName == 'NETBLOCK_OWNER': if not self.opts['netblocklookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None if eventName == 'NETBLOCK_MEMBER': if not self.opts['subnetlookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxsubnet'])) return None if eventName == 'IP_ADDRESS' or eventName.startswith('NETBLOCK_'): evtType = 'MALICIOUS_IPADDR' if eventName == "AFFILIATE_IPADDR": evtType = 'MALICIOUS_AFFILIATE_IPADDR' ret = self.queryIP(eventData) if not ret: return None if "data" not in ret: return None if len(ret["data"]) > 0: for rec in ret["data"]: if rec.get("seen", None): self.sf.debug("Found threat info in Greynoise") lastseen = rec.get("last_seen", "1970-01-01") lastseen_dt = datetime.strptime(lastseen, '%Y-%m-%d') lastseen_ts = int(time.mktime(lastseen_dt.timetuple())) age_limit_ts = int( time.time()) - (86400 * self.opts['age_limit_days']) if self.opts[ 'age_limit_days'] > 0 and lastseen_ts < age_limit_ts: self.sf.debug("Record found but too old, skipping.") return None # Only report meta data about the target, not affiliates if rec.get("metadata") and eventName == "IP_ADDRESS": met = rec.get("metadata") if met.get("country", "unknown") != "unknown": loc = "" if met.get("city"): loc = met.get("city") + ", " loc += met.get("country") e = SpiderFootEvent("GEOINFO", loc, self.__name__, event) self.notifyListeners(e) if met.get("asn", "unknown") != "unknown": asn = met.get("asn").replace("AS", "") e = SpiderFootEvent("BGP_AS_MEMBER", asn, self.__name__, event) self.notifyListeners(e) if met.get("organization", "unknown") != "unknown": e = SpiderFootEvent("COMPANY_NAME", met.get("organization"), self.__name__, event) self.notifyListeners(e) if met.get("os", "unknown") != "unknown": e = SpiderFootEvent("OPERATING_SYSTEM", met.get("os"), self.__name__, event) self.notifyListeners(e) e = SpiderFootEvent("RAW_RIR_DATA", str(rec), self.__name__, event) self.notifyListeners(e) if rec.get("classification"): descr = "Greynoise [" + eventData + "]\n - Classification: " + rec.get( "classification") if rec.get("tags"): descr += ", Tags: " + ", ".join(rec.get("tags")) else: descr += "\n - " + "Raw data: " + str( rec.get("raw_data")) descr += "\n<SFURL>https://viz.greynoise.io/ip/" + eventData + "</SFURL>" e = SpiderFootEvent(evtType, descr, self.__name__, event) self.notifyListeners(e)
def handleEvent(self, event): # We are only interested in the raw data from the spidering module # because the spidering module will always provide events with the # event.sourceEvent.data set to the URL of the source. if "sfp_spider" not in event.module: self.sf.debug("Ignoring web content from " + event.module) return None eventName = event.eventType srcModuleName = event.module eventData = event.data eventSource = event.actualSource self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # We aren't interested in describing pages that are not hosted on # our base domain. if not self.getTarget().matches(self.sf.urlFQDN(eventSource)): self.sf.debug("Not gathering page info for external site " + eventSource) return None if eventSource not in self.results: self.results[eventSource] = list() else: self.sf.debug( "Already checked this page for a page type, skipping.") return None # Check the configured regexps to determine the page type for regexpGrp in regexps: if regexpGrp in self.results[eventSource]: continue for regex in regexps[regexpGrp]: rx = re.compile(regex, re.IGNORECASE) matches = re.findall(rx, eventData) if len(matches ) > 0 and regexpGrp not in self.results[eventSource]: self.sf.info("Matched " + regexpGrp + " in content from " + eventSource) self.results[eventSource] = self.results[eventSource] + [ regexpGrp ] evt = SpiderFootEvent(regexpGrp, eventSource, self.__name__, event) self.notifyListeners(evt) # If no regexps were matched, consider this a static page if len(self.results[eventSource]) == 0: self.sf.info("Treating " + eventSource + " as URL_STATIC") evt = SpiderFootEvent("URL_STATIC", eventSource, self.__name__, event) self.notifyListeners(evt) # Check for externally referenced Javascript pages pat = re.compile("<script.*src=[\'\"]?([^\'\">]*)", re.IGNORECASE) matches = re.findall(pat, eventData) if len(matches) > 0: for match in matches: if '://' in match and not self.getTarget().matches( self.sf.urlFQDN(match)): self.sf.debug("Externally hosted Javascript found at: " + match) evt = SpiderFootEvent("PROVIDER_JAVASCRIPT", match, self.__name__, event) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") # Don't look up stuff twice if eventData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[eventData] = True if eventName == 'NETBLOCK_OWNER': if not self.opts['netblocklookup']: return None if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None if eventName == 'NETBLOCK_MEMBER': if not self.opts['subnetlookup']: return None if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxsubnet'])) return None qrylist = list() if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: # If user has enabled affiliate checking if eventName == "AFFILIATE_IPADDR" and not self.opts[ 'checkaffiliates']: return None qrylist.append(eventData) for addr in qrylist: if self.checkForStop(): return None data = self.queryIPAddress(addr) if data is None: break try: maliciousIP = data[0].get('ip') except: # If ArrayIndex is out of bounds then data doesn't exist continue if maliciousIP is None: continue if addr != maliciousIP: self.sf.error( "Reported address doesn't match requested, skipping", False) continue # Data is reported about the IP Address if eventName.startswith("NETBLOCK_"): ipEvt = SpiderFootEvent("IP_ADDRESS", addr, self.__name__, event) self.notifyListeners(ipEvt) if eventName.startswith("NETBLOCK_"): evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__, ipEvt) self.notifyListeners(evt) else: evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__, event) self.notifyListeners(evt) maliciousIPDesc = f"Phishstats [{maliciousIP}]\n" maliciousIPDescHash = self.sf.hashstring(maliciousIPDesc) if maliciousIPDescHash in self.results: continue self.results[maliciousIPDescHash] = True if eventName.startswith("NETBLOCK_"): evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc, self.__name__, ipEvt) elif eventName.startswith("AFFILIATE_"): evt = SpiderFootEvent("MALICIOUS_AFFILIATE_IPADDR", maliciousIPDesc, self.__name__, event) else: evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc, self.__name__, event) self.notifyListeners(evt) return None
def sendEvent(self, source, result): self.sf.info("Found a brute-forced host: " + result) # Report the host evt = SpiderFootEvent("INTERNET_NAME", result, self.__name__, source) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data ret = None if self.errorState: return None # Ignore messages from myself if srcModuleName == "sfp_circllu": return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if self.opts['api_key_login'] == "" or self.opts[ 'api_key_password'] == "": self.sf.error( "You enabled sfp_circllu but did not set an credentials!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: self.results[eventData] = True if eventName in ['IP_ADDRESS', 'NETBLOCK_OWNER']: # CIRCL.LU limit the maximum subnet size to 23 # http://circl.lu/services/passive-ssl/ if "/" in eventData: addr, mask = eventData.split("/") if int(mask) < 23: self.sf.debug( "Network size bigger than permitted by CIRCL.LU.") else: ret = self.query(eventData, "PSSL") if not ret: self.sf.info( "No CIRCL.LU passive SSL data found for " + eventData) else: ret = self.query(eventData, "PSSL") if not ret: self.sf.info("No CIRCL.LU passive SSL data found for " + eventData) if ret: try: # Generate an event for the IP first, and then link the cert # to that event. j = json.loads(ret) for ip in j: ipe = event if ip != eventData: ipe = SpiderFootEvent("IP_ADDRESS", ip, self.__name__, event) self.notifyListeners(ipe) for crt in j[ip]['subjects']: r = re.findall( ".*[\"\'](.+CN=([a-zA-Z0-9\-\*\.])+)[\"\'].*", str(j[ip]['subjects'][crt]), re.IGNORECASE) if r: e = SpiderFootEvent("SSL_CERTIFICATE_ISSUED", r[0][0], self.__name__, ipe) self.notifyListeners(e) except BaseException as e: self.sf.error( "Invalid response returned from CIRCL.LU: " + str(e), False) if eventName in ['IP_ADDRESS', 'INTERNET_NAME', 'DOMAIN_NAME']: ret = self.query(eventData, "PDNS") if not ret: self.sf.info("No CIRCL.LU passive DNS data found for " + eventData) return None # CIRCL.LU doesn't return valid JSON - it's one JSON record per line for line in ret.split("\n"): if len(line) < 2: continue try: rec = json.loads(line) except BaseException as e: self.sf.error( "Invalid response returned from CIRCL.LU: " + str(e), False) continue age_limit_ts = int( time.time()) - (86400 * self.opts['age_limit_days']) if self.opts['age_limit_days'] > 0 and rec[ 'time_last'] < age_limit_ts: self.sf.debug("Record found but too old, skipping.") continue cohosts = list() if eventName == "IP_ADDRESS": # Record could be pointing to our IP, or from our IP if rec['rrtype'] == "A" and rec['rdata'] == eventData: if not self.getTarget().matches(rec['rrname']): # We found a co-host cohosts.append(rec['rrname']) if eventName in ["INTERNET_NAME", "DOMAIN_NAME"]: # Record could be an A/CNAME of this entity, or something pointing to it if rec['rdata'] == eventData: if not self.getTarget().matches(rec['rrname']): # We found a co-host cohosts.append(rec['rrname']) for co in cohosts: if eventName == "IP_ADDRESS" and ( self.opts['verify'] and not self.validateIP(co, eventData)): self.sf.debug("Host no longer resolves to our IP.") continue if not self.opts['cohostsamedomain']: if self.getTarget().matches(co, includeParents=True): self.sf.debug("Skipping " + co + " because it is on the same domain.") continue e = SpiderFootEvent("CO_HOSTED_SITE", co, self.__name__, event) self.notifyListeners(e)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data users = list() if self.errorState: return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Skip events coming from me unless they are USERNAME events if eventName != "USERNAME" and srcModuleName == "sfp_accounts": return None if eventData not in list(self.results.keys()): self.results[eventData] = True else: return None # If being called for the first time, let's see how trusted the # sites are by attempting to fetch a garbage user. if not self.distrustedChecked: # Check if a state cache exists first, to not have to do this all the time content = self.sf.cacheGet("sfaccounts_state", 72) if content: delsites = list() for line in content.split("\n"): if line == '': continue delsites.append(line) self.sites = [d for d in self.sites if d['name'] not in delsites] else: randpool = 'abcdefghijklmnopqrstuvwxyz1234567890' randuser = ''.join([random.SystemRandom().choice(randpool) for x in range(10)]) res = self.batchSites(randuser) if len(res) > 0: delsites = list() for site in res: sitename = site.split(" (Category:")[0] self.sf.debug("Distrusting " + sitename) delsites.append(sitename) self.sites = [d for d in self.sites if d['name'] not in delsites] self.sf.cachePut("sfaccounts_state", delsites) self.distrustedChecked = True if eventName == "HUMAN_NAME": names = [ eventData.lower().replace(" ", ""), eventData.lower().replace(" ", ".") ] for name in names: users.append(name) if eventName == "DOMAIN_NAME": kw = self.sf.domainKeyword(eventData, self.opts['_internettlds']) users.append(kw) if eventName == "EMAILADDR": name = eventData.split("@")[0].lower() users.append(name) if eventName == "USERNAME": users.append(eventData) for user in users: adduser = True if self.opts['generic'] is list() and user in self.opts['generic']: self.sf.debug(user + " is a generic account name, skipping.") continue if self.opts['ignorenamedict'] and user in self.commonNames: self.sf.debug(user + " is found in our name dictionary, skipping.") continue if self.opts['ignoreworddict'] and user in self.words: self.sf.debug(user + " is found in our word dictionary, skipping.") continue res = self.batchSites(user) for site in res: evt = SpiderFootEvent("ACCOUNT_EXTERNAL_OWNED", site, self.__name__, event) self.notifyListeners(evt) if user not in self.reportedUsers and eventData != user: evt = SpiderFootEvent("USERNAME", user, self.__name__, event) self.notifyListeners(evt) self.reportedUsers.append(user)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.sourceEvent.data if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)") m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug("Both first and second names are not in the dictionary, so high chance of name: (" + first +":" + second +").") p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)") m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: p += 75 notindict = True # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig if p > self.opts['algotune']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if self.errorState: return None if eventData in self.results: return None if self.opts['api_key'] == '': self.sf.error( "You enabled sfp_networksdb but did not set an API key!", False) self.errorState = True return None self.results[eventData] = True self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) if eventName in ["IP_ADDRESS", "IPV6_ADDRESS"]: data = self.queryIpInfo(eventData) if data is None: self.sf.debug("No IP address information found for " + eventData) else: evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__, event) self.notifyListeners(evt) network = data.get('network') if network: cidr = network.get('cidr') if cidr and cidr != 'N/A': evt = SpiderFootEvent('NETBLOCK_MEMBER', cidr, self.__name__, event) self.notifyListeners(evt) data = self.queryIpGeo(eventData) if data is None: self.sf.debug("No IP geolocation information found for " + eventData) else: evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__, event) self.notifyListeners(evt) if data.get('country'): location = ', '.join( filter(None, [ data.get('city'), data.get('state'), data.get('country') ])) evt = SpiderFootEvent('GEOINFO', location, self.__name__, event) self.notifyListeners(evt) data = self.queryReverseDns(eventData) cohosts = list() if data is None: self.sf.debug("No reverse DNS results for " + eventData) else: evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__, event) self.notifyListeners(evt) results = data.get('results') if results: for domain in results: cohosts.append(domain) for co in set(cohosts): if self.checkForStop(): return None if co in self.results: continue if self.opts['verify'] and not self.sf.validateIP( co, eventData): self.sf.debug("Host " + co + " no longer resolves to " + eventData) continue if not self.opts['cohostsamedomain']: if self.getTarget().matches(co, includeParents=True): evt = SpiderFootEvent('INTERNET_NAME', co, self.__name__, event) self.notifyListeners(evt) if self.sf.isDomain(co, self.opts['_internettlds']): evt = SpiderFootEvent('DOMAIN_NAME', co, self.__name__, event) self.notifyListeners(evt) continue if self.cohostcount < self.opts['maxcohost']: evt = SpiderFootEvent('CO_HOSTED_SITE', co, self.__name__, event) self.notifyListeners(evt) self.cohostcount += 1 if eventName in ["INTERNET_NAME", "DOMAIN_NAME"]: data = self.queryForwardDns(eventData) if data is None: self.sf.debug("No forward DNS results for " + eventData) return None res = data.get('results') if not res: self.sf.debug("No forward DNS results for " + eventData) return None evt = SpiderFootEvent('RAW_RIR_DATA', str(res), self.__name__, event) self.notifyListeners(evt) for ip in res: if self.sf.validIP(ip): evt = SpiderFootEvent('IP_ADDRESS', ip, self.__name__, event) self.notifyListeners(evt) elif self.sf.validIP6(ip): evt = SpiderFootEvent('IPV6_ADDRESS', ip, self.__name__, event) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results[sourceData] = True self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName in [ 'TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS' ]: # Make potential phone numbers more friendly to parse content = eventData.replace('.', '-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number( match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) if eventName == 'PHONE_NUMBER': try: number = phonenumbers.parse(eventData) except BaseException as e: self.sf.debug('Error parsing phone number: ' + str(e)) return None try: number_carrier = carrier.name_for_number(number, 'en') except BaseException as e: self.sf.debug('Error retrieving phone number carrier: ' + str(e)) return None if number_carrier: evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier, self.__name__, event) self.notifyListeners(evt) else: self.sf.debug("No carrier information found for " + eventData) #try: # location = geocoder.description_for_number(number, 'en') #except BaseException as e: # self.sf.debug('Error retrieving phone number location: ' + str(e)) # return None #if location: # evt = SpiderFootEvent("GEOINFO", location, self.__name__, event) # self.notifyListeners(evt) #else: # self.sf.debug("No location information found for " + eventData) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") # The SIMILARDOMAIN and CO_HOSTED_SITE events supply domains, # not URLs. Assume HTTP. if eventName in ['SIMILARDOMAIN', 'CO_HOSTED_SITE']: eventData = 'http://' + eventData.lower() # We are only interested in external sites for the crossref if self.getTarget().matches(self.sf.urlFQDN(eventData)): self.sf.debug("Ignoring " + eventData + " as not external") return None if eventData in self.fetched: self.sf.debug("Ignoring " + eventData + " as already tested") return else: self.fetched[eventData] = True self.sf.debug("Testing for affiliation: " + eventData) res = self.sf.fetchUrl(eventData, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'], sizeLimit=10000000, verify=False) if res['content'] is None: self.sf.debug("Ignoring " + eventData + " as no data returned") return None matched = False for name in self.getTarget().getNames(): # Search for mentions of our host/domain in the external site's data pat = re.compile("([\.\'\/\"\ ]" + name + "[\.\'\/\"\ ])", re.IGNORECASE) matches = re.findall(pat, res['content']) if len(matches) > 0: matched = True url = eventData break if not matched: # If the name wasn't found in the affiliate, and checkbase is set, # fetch the base URL of the affiliate to check for a crossref. if eventName == "LINKED_URL_EXTERNAL" and self.opts['checkbase']: # Check the base url to see if there is an affiliation url = self.sf.urlBaseUrl(eventData) if url in self.fetched: return None else: self.fetched[url] = True res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'], sizeLimit=10000000, verify=False) if res['content'] is not None: for name in self.getTarget().getNames(): pat = re.compile( "([\.\'\/\"\ ]" + name + "[\'\/\"\ ])", re.IGNORECASE) matches = re.findall(pat, res['content']) if len(matches) > 0: matched = True if matched: if not event.moduleDataSource: event.moduleDataSource = "Unknown" self.sf.info("Found affiliate: " + url) evt1 = SpiderFootEvent("AFFILIATE_INTERNET_NAME", self.sf.urlFQDN(url), self.__name__, event) evt1.moduleDataSource = event.moduleDataSource self.notifyListeners(evt1) evt2 = SpiderFootEvent("AFFILIATE_WEB_CONTENT", res['content'], self.__name__, evt1) evt2.moduleDataSource = event.moduleDataSource self.notifyListeners(evt2)