Exemple #1
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # Make potential phone numbers more friendly to parse
        content = eventData.replace('.','-')
        for match in phonenumbers.PhoneNumberMatcher(content, region=None):
            n = phonenumbers.format_number(match.number, 
                                           phonenumbers.PhoneNumberFormat.E164)
            evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Exemple #2
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventName.startswith("EMAILADDR"):
            return None

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if type(eventData) not in [str, unicode]:
            self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData)))
            return None

        pat = re.compile("([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the doain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(mailDom):
                self.sf.debug("Ignoring e-mail address on an external domain: " + match)
                continue

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Exemple #3
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if self.opts['api_key'] == "":
            self.sf.error("You enabled sfp_shodan but did not set an API key!",
                          False)
            self.errorState = True
            return None

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName == "DOMAIN_NAME":
            hosts = self.searchHosts(eventData)
            if hosts is None:
                return None

            evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", str(hosts),
                                  self.__name__, event)
            self.notifyListeners(evt)

        if eventName == 'WEB_ANALYTICS_ID':
            try:
                network = eventData.split(": ")[0]
                analytics_id = eventData.split(": ")[1]
            except BaseException as e:
                self.sf.error(
                    "Unable to parse WEB_ANALYTICS_ID: " + eventData + " (" +
                    str(e) + ")", False)
                return None

            if network not in [
                    'Google AdSense', 'Google Analytics',
                    'Google Site Verification'
            ]:
                self.sf.debug("Skipping " + eventData + ", as not supported.")
                return None

            rec = self.searchHtml(analytics_id)

            if rec is None:
                return None

            evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", str(rec),
                                  self.__name__, event)
            self.notifyListeners(evt)
            return None

        if eventName == 'NETBLOCK_OWNER':
            if not self.opts['netblocklookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxnetblock']))
                    return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            qrylist.append(eventData)

        for addr in qrylist:
            rec = self.query(addr)
            if rec is None:
                continue

            evt = SpiderFootEvent("RAW_RIR_DATA", str(rec), self.__name__,
                                  event)
            self.notifyListeners(evt)

            if self.checkForStop():
                return None

            if rec.get('os') is not None:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("OPERATING_SYSTEM",
                                      rec.get('os') + " (" + addr + ")",
                                      self.__name__, event)
                self.notifyListeners(evt)

            if rec.get('devtype') is not None:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("DEVICE_TYPE",
                                      rec.get('devtype') + " (" + addr + ")",
                                      self.__name__, event)
                self.notifyListeners(evt)

            if rec.get('country_name') is not None:
                location = ', '.join([
                    _f for _f in [rec.get('city'),
                                  rec.get('country_name')] if _f
                ])
                evt = SpiderFootEvent("GEOINFO", location, self.__name__,
                                      event)
                self.notifyListeners(evt)

            if 'data' in rec:
                self.sf.info("Found SHODAN data for " + eventData)
                for r in rec['data']:
                    port = str(r.get('port'))
                    banner = r.get('banner')
                    asn = r.get('asn')
                    product = r.get('product')
                    vulns = r.get('vulns')

                    if port is not None:
                        # Notify other modules of what you've found
                        cp = addr + ":" + port
                        evt = SpiderFootEvent("TCP_PORT_OPEN", cp,
                                              self.__name__, event)
                        self.notifyListeners(evt)

                    if banner is not None:
                        # Notify other modules of what you've found
                        evt = SpiderFootEvent("TCP_PORT_OPEN_BANNER", banner,
                                              self.__name__, event)
                        self.notifyListeners(evt)

                    if product is not None:
                        evt = SpiderFootEvent("SOFTWARE_USED", product,
                                              self.__name__, event)
                        self.notifyListeners(evt)

                    if asn is not None:
                        evt = SpiderFootEvent("BGP_AS_MEMBER",
                                              asn.replace("AS", ""),
                                              self.__name__, event)
                        self.notifyListeners(evt)

                    if vulns is not None:
                        for vuln in list(vulns.keys()):
                            evt = SpiderFootEvent('VULNERABILITY', vuln,
                                                  self.__name__, event)
                            self.notifyListeners(evt)

        return None
Exemple #4
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventData in self.results:
            return None
        else:
            self.results.append(eventData)

        for fileExt in self.opts['fileexts']:
            if self.checkForStop():
                return None

            if "." + fileExt.lower() in eventData.lower():
                # Fetch the file, allow much more time given that these files are
                # typically large.
                ret = self.sf.fetchUrl(eventData,
                                       timeout=self.opts['timeout'],
                                       useragent=self.opts['_useragent'],
                                       dontMangle=True,
                                       sizeLimit=10000000)
                if ret['content'] is None:
                    self.sf.error(
                        "Unable to fetch file for meta analysis: " + eventData,
                        False)
                    return None

                if len(ret['content']) < 512:
                    self.sf.error(
                        "Strange content encountered, size of " +
                        str(len(ret['content'])), False)
                    return None

                meta = None
                data = None
                # Based on the file extension, handle it
                if fileExt.lower() == "pdf":
                    try:
                        raw = StringIO(ret['content'])
                        #data = metapdf.MetaPdfReader().read_metadata(raw)
                        pdf = PyPDF2.PdfFileReader(raw, strict=False)
                        data = pdf.getDocumentInfo()
                        meta = str(data)
                        self.sf.debug("Obtained meta data from " + eventData)
                    except BaseException as e:
                        self.sf.error(
                            "Unable to parse meta data from: " + eventData +
                            "(" + str(e) + ")", False)
                        return None

                if fileExt.lower() in ["pptx", "docx", "xlsx"]:
                    try:
                        mtype = mimetypes.guess_type(eventData)[0]
                        doc = openxmllib.openXmlDocument(data=ret['content'],
                                                         mime_type=mtype)
                        self.sf.debug("Office type: " + doc.mimeType)
                        data = doc.allProperties
                        meta = str(data)
                    except ValueError as e:
                        self.sf.error(
                            "Unable to parse meta data from: " + eventData +
                            "(" + str(e) + ")", False)
                        return None
                    except lxml.etree.XMLSyntaxError as e:
                        self.sf.error(
                            "Unable to parse XML within: " + eventData + "(" +
                            str(e) + ")", False)
                        return None
                    except BaseException as e:
                        self.sf.error(
                            "Unable to process file: " + eventData + "(" +
                            str(e) + ")", False)
                        return None

                if fileExt.lower() in ["jpg", "jpeg", "tiff"]:
                    try:
                        raw = StringIO(ret['content'])
                        data = exifread.process_file(raw)
                        if data is None or len(data) == 0:
                            continue
                        meta = str(data)
                    except BaseException as e:
                        self.sf.error(
                            "Unable to parse meta data from: " + eventData +
                            "(" + str(e) + ")", False)
                        return None

                if meta is not None and data is not None:
                    evt = SpiderFootEvent("RAW_FILE_META_DATA", meta,
                                          self.__name__, event)
                    self.notifyListeners(evt)

                    val = None
                    try:
                        if "/Producer" in data:
                            val = data['/Producer']

                        if "/Creator" in data:
                            if "/Producer" in data:
                                if data['/Creator'] != data['/Producer']:
                                    val = data['/Creator']
                            else:
                                val = data['/Creator']

                        if "Application" in data:
                            val = data['Application']

                        if "Image Software" in data:
                            val = str(data['Image Software'])
                    except BaseException as e:
                        self.sf.error(
                            "Failed to parse PDF, " + eventData + ": " +
                            str(e), False)
                        return None

                    if val and not isinstance(val, PyPDF2.generic.NullObject):
                        # Strip non-ASCII
                        val = ''.join(
                            [i if ord(i) < 128 else ' ' for i in val])
                        evt = SpiderFootEvent("SOFTWARE_USED", val,
                                              self.__name__, event)
                        self.notifyListeners(evt)
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventData in self.results:
            return None

        self.results[eventData] = True

        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        if srcModuleName == 'sfp_grep_app':
            self.sf.debug("Ignoring " + eventData + ", from self.")
            return None

        hosts = list()
        page = 1
        per_page = 10
        pages = self.opts['max_pages']
        while page <= pages:
            if self.checkForStop():
                return None

            if self.errorState:
                return None

            res = self.query(eventData, page)

            if res is None:
                return None

            facets = res.get('facets')

            if facets is None:
                return None

            count = facets.get('count')

            if count is None:
                return None

            last_page = math.ceil(count / per_page)

            if last_page is None:
                pages = 0

            if last_page < pages:
                pages = last_page

            self.sf.info("Parsing page " + str(page) + " of " + str(pages))
            page += 1

            hits = res.get('hits')

            if hits is None:
                return None

            data = hits.get('hits')

            if data is None:
                return None

            for result in data:
                if result is None:
                    continue

                evt = SpiderFootEvent("RAW_RIR_DATA", str(result),
                                      self.__name__, event)
                self.notifyListeners(evt)

                content = result.get('content')

                if content is None:
                    continue

                snippet = content.get('snippet')

                if snippet is None:
                    continue

                links = self.sf.extractUrls(
                    snippet.replace('<mark>', '').replace('</mark>', ''))
                if links:
                    for link in links:
                        if link in self.results:
                            continue

                        host = self.sf.urlFQDN(link)

                        if not self.getTarget().matches(host,
                                                        includeChildren=True,
                                                        includeParents=True):
                            continue

                        hosts.append(host)

                        if not self.getTarget().matches(self.sf.urlFQDN(link),
                                                        includeChildren=True,
                                                        includeParents=True):
                            self.sf.debug("Skipped unrelated link: " + link)
                            continue

                        self.sf.debug('Found a link: ' + link)
                        evt = SpiderFootEvent('LINKED_URL_INTERNAL', link,
                                              self.__name__, event)
                        self.notifyListeners(evt)
                        self.results[link] = True

                emails = self.sf.parseEmails(
                    snippet.replace('<mark>', '').replace('</mark>', ''))
                if emails:
                    for email in emails:
                        if email in self.results:
                            continue

                        mail_domain = email.lower().split('@')[1]
                        if not self.getTarget().matches(mail_domain,
                                                        includeChildren=True,
                                                        includeParents=True):
                            self.sf.debug("Skipped unrelated email address: " +
                                          email)
                            continue

                        self.sf.info("Found e-mail address: " + email)
                        if email.split("@")[0] in self.opts[
                                '_genericusers'].split(","):
                            evttype = "EMAILADDR_GENERIC"
                        else:
                            evttype = "EMAILADDR"

                        evt = SpiderFootEvent(evttype, email, self.__name__,
                                              event)
                        self.notifyListeners(evt)
                        self.results[email] = True

        for host in set(hosts):
            if self.checkForStop():
                return None

            if self.errorState:
                return None

            if self.opts['dns_resolve'] and not self.sf.resolveHost(host):
                self.sf.debug("Host " + host + " could not be resolved")
                evt = SpiderFootEvent("INTERNET_NAME_UNRESOLVED", host,
                                      self.__name__, event)
                self.notifyListeners(evt)
                continue

            evt = SpiderFootEvent("INTERNET_NAME", host, self.__name__, event)
            self.notifyListeners(evt)
            if self.sf.isDomain(host, self.opts["_internettlds"]):
                evt = SpiderFootEvent("DOMAIN_NAME", host, self.__name__,
                                      event)
                self.notifyListeners(evt)
Exemple #6
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[eventData] = True

        if eventName == 'NETBLOCK_OWNER':
            if not self.opts['netblocklookup']:
                return None

            if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                self.sf.debug("Network size bigger than permitted: " +
                              str(IPNetwork(eventData).prefixlen) + " > " +
                              str(self.opts['maxnetblock']))
                return None

        if eventName == 'NETBLOCK_MEMBER':
            if not self.opts['subnetlookup']:
                return None

            if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']:
                self.sf.debug("Network size bigger than permitted: " +
                              str(IPNetwork(eventData).prefixlen) + " > " +
                              str(self.opts['maxsubnet']))
                return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            # If user has enabled affiliate checking
            if eventName == "AFFILIATE_IPADDR" and not self.opts[
                    'checkaffiliates']:
                return None
            qrylist.append(eventData)

        for addr in qrylist:

            if self.checkForStop():
                return None

            data = self.queryIPAddress(addr)

            if data is None:
                break

            maliciousIP = data.get('ip_addr')

            if maliciousIP is None:
                continue

            if addr != maliciousIP:
                self.sf.error(
                    "Reported address doesn't match requested, skipping",
                    False)
                continue

            blacklistedRecords = data.get('blacklist')

            if blacklistedRecords is None or len(blacklistedRecords) == 0:
                self.sf.debug("No blacklist information found for IP")
                continue

            # Data is reported about the IP Address
            if eventName.startswith("NETBLOCK_"):
                ipEvt = SpiderFootEvent("IP_ADDRESS", addr, self.__name__,
                                        event)
                self.notifyListeners(ipEvt)

            if eventName.startswith("NETBLOCK_"):
                evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__,
                                      ipEvt)
                self.notifyListeners(evt)
            else:
                evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__,
                                      event)
                self.notifyListeners(evt)

            maliciousIPDesc = f"Maltiverse [{maliciousIP}]\n"

            for blacklistedRecord in blacklistedRecords:
                lastSeen = blacklistedRecord.get('last_seen')
                if lastSeen is None:
                    continue

                try:
                    lastSeenDate = datetime.strptime(str(lastSeen),
                                                     "%Y-%m-%d %H:%M:%S")
                except BaseException:
                    self.sf.error("Invalid date in JSON response, skipping",
                                  False)
                    continue

                today = datetime.now()

                difference = (today - lastSeenDate).days

                if difference > int(self.opts["age_limit_days"]):
                    self.sf.debug(
                        "Record found is older than age limit, skipping")
                    continue

                maliciousIPDesc += " - DESCRIPTION : " + str(
                    blacklistedRecord.get("description")) + "\n"

            maliciousIPDescHash = self.sf.hashstring(maliciousIPDesc)

            if maliciousIPDescHash in self.results:
                continue

            self.results[maliciousIPDescHash] = True

            if eventName.startswith("NETBLOCK_"):
                evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc,
                                      self.__name__, ipEvt)
            elif eventName.startswith("AFFILIATE_"):
                evt = SpiderFootEvent("MALICIOUS_AFFILIATE_IPADDR",
                                      maliciousIPDesc, self.__name__, event)
            else:
                evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc,
                                      self.__name__, event)

            self.notifyListeners(evt)

        return None
Exemple #7
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if self.opts['apikey'] == "":
            self.sf.error("You enabled sfp_shodan but did not set an API key!",
                          False)
            self.errorState = True
            return None

            # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName == 'NETBLOCK_OWNER' and self.opts['netblocklookup']:
            if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                self.sf.debug("Network size bigger than permitted: " +
                              str(IPNetwork(eventData).prefixlen) + " > " +
                              str(self.opts['maxnetblock']))
                return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            qrylist.append(eventData)

        for addr in qrylist:
            rec = self.query(addr)
            if rec is None:
                continue

            if self.checkForStop():
                return None

            if rec.get('os') is not None:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("OPERATING_SYSTEM",
                                      rec.get('os') + " (" + addr + ")",
                                      self.__name__, event)
                self.notifyListeners(evt)

            if rec.get('devtype') is not None:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("DEVICE_TYPE",
                                      rec.get('devtype') + " (" + addr + ")",
                                      self.__name__, event)
                self.notifyListeners(evt)

            if 'data' in rec:
                self.sf.info("Found SHODAN data for " + eventData)
                for r in rec['data']:
                    port = str(r.get('port'))
                    banner = r.get('banner')

                    if port is not None:
                        # Notify other modules of what you've found
                        cp = addr + ":" + port
                        evt = SpiderFootEvent("TCP_PORT_OPEN", cp,
                                              self.__name__, event)
                        self.notifyListeners(evt)

                    if banner is not None:
                        # Notify other modules of what you've found
                        evt = SpiderFootEvent("TCP_PORT_OPEN_BANNER", banner,
                                              self.__name__, event)
                        self.notifyListeners(evt)

        return None
Exemple #8
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        self.currentEventSrc = event

        self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName))

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName == "DOMAIN_NAME":
            ret = self.query("domain", eventData)
            if not ret:
                return None
            if "pocs" in ret:
                if "pocRef" in ret['pocs']:
                    ref = list()
                    # Might be a list or a dictionary
                    if type(ret['pocs']['pocRef']) == dict:
                        ref = [ret['pocs']['pocRef']]
                    else:
                        ref = ret['pocs']['pocRef']
                    for p in ref:
                        name = p['@name']
                        if "," in name:
                            sname = name.split(", ", 1)
                            name = sname[1] + " " + sname[0]

                        # A bit of a hack. The reason we do this is because
                        # the names are separated in the content and sfp_names
                        # won't recognise it. So we submit this and see if it
                        # really is considered a name.
                        evt = SpiderFootEvent("RAW_RIR_DATA", "Possible full name: " + name,
                                              self.__name__, self.currentEventSrc)
                        self.notifyListeners(evt)

                        # We just want the raw data so we can get potential
                        # e-mail addresses.
                        self.query("contact", p['$'])

        if eventName == "HUMAN_NAME":
            ret = self.query("name", eventData)
            if not ret:
                return None
            if "pocs" in ret:
                if "pocRef" in ret['pocs']:
                    ref = list()
                    # Might be a list or a dictionary
                    if type(ret['pocs']['pocRef']) == dict:
                        ref = [ret['pocs']['pocRef']]
                    else:
                        ref = ret['pocs']['pocRef']
                    for p in ref:
                        # We just want the raw data so we can get potential
                        # e-mail addresses.
                        self.query("contact", p['$'])
Exemple #9
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.actualSource
            if url is not None:
                if self.opts['filterjscss'] and (".js" in url
                                                 or ".css" in url):
                    self.sf.debug("Ignoring web content from CSS/JS.")
                    return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == str:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                    name = str(name)

                # Names don't have numbers
                if re.match("[0-9]*", name):
                    return None

                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile(
            "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)"
        )
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug(
                    "Both first and second names are not in the dictionary, so high chance of name: ("
                    + first + ":" + second + ").")
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second +
                              " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Exemple #10
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if self.opts['apikey'] == "":
            self.sf.error(
                "You enabled sfp_virustotal but did not set an API key!",
                False)
            return None

    # Don't look up stuff twice
        if self.results.has_key(eventData):
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName.startswith(
                "AFFILIATE") and not self.opts['checkaffiliates']:
            return None

        if eventName == 'CO_HOSTED_SITE' and not self.opts['checkcohosts']:
            return None

        if eventName == 'NETBLOCK_OWNER' and self.opts['netblocklookup']:
            if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                self.sf.debug("Network size bigger than permitted: " + \
                    str(IPNetwork(eventData).prefixlen) + " > " + \
                    str(self.opts['maxnetblock']))
                return None

        if eventName == 'NETBLOCK_MEMBER' and self.opts['subnetlookup']:
            if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']:
                self.sf.debug("Network size bigger than permitted: " + \
                    str(IPNetwork(eventData).prefixlen) + " > " + \
                    str(self.opts['maxsubnet']))
                return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            qrylist.append(eventData)

        for addr in qrylist:
            if self.checkForStop():
                return None

            info = self.query(addr)
            if info == None:
                continue
            if info.has_key('detected_urls'):
                self.sf.info("Found VirusTotal URL data for " + addr)
                if eventName in ["IP_ADDRESS"
                                 ] or eventName.startswith("NETBLOCK_"):
                    evt = "MALICIOUS_IPADDR"
                    infotype = "ip-address"

                if eventName == "AFFILIATE_IPADDR":
                    evt = "MALICIOUS_AFFILIATE_IPADDR"
                    infotype = "ip-address"

                if eventName == "INTERNET_NAME":
                    evt = "MALICIOUS_INTERNET_NAME"
                    infotype = "domain"

                if eventName == "AFFILIATE_INTERNET_NAME":
                    evt = "MALICIOUS_AFFILIATE_INTERNET_NAME"
                    infotype = "domain"

                if eventName == "CO_HOSTED_SITE":
                    evt = "MALICIOUS_COHOST"
                    infotype = "domain"

                infourl = "<SFURL>https://www.virustotal.com/en/" + infotype + "/" + \
                    addr + "/information/</SFURL>"

                # Notify other modules of what you've found
                e = SpiderFootEvent(evt, "VirusTotal [" + addr + "]\n" + \
                    infourl, self.__name__, event)
                self.notifyListeners(e)
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        self.currentEventSrc = event

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results.append(eventData)

        if self.keywords is None:
            self.keywords = self.sf.domainKeywords(self.getTarget().getNames(),
                                                   self.opts['_internettlds'])

        for site in sites.keys():
            s = unicode(sites[site][0]).format(eventData)
            searchStr = s.replace(" ", "%20")
            searchDom = sites[site][1]

            if self.opts['method'].lower() == "google":
                results = self.sf.googleIterate(
                    searchStr,
                    dict(limit=self.opts['pages'],
                         useragent=self.opts['_useragent'],
                         timeout=self.opts['_fetchtimeout']))

            if self.opts['method'].lower() == "yahoo":
                results = self.sf.yahooIterate(
                    searchStr,
                    dict(limit=self.opts['pages'],
                         useragent=self.opts['_useragent'],
                         timeout=self.opts['_fetchtimeout']))

            if self.opts['method'].lower() == "bing":
                results = self.sf.bingIterate(
                    searchStr,
                    dict(limit=self.opts['pages'],
                         useragent=self.opts['_useragent'],
                         timeout=self.opts['_fetchtimeout']))

            if results is None:
                self.sf.info("No data returned from " + self.opts['method'] +
                             ".")
                return None

            if self.checkForStop():
                return None

            pauseSecs = random.randint(4, 15)
            self.sf.debug("Pausing for " + str(pauseSecs))
            time.sleep(pauseSecs)

            for key in results.keys():
                instances = list()
                # Yahoo requires some additional parsing
                if self.opts['method'].lower() == "yahoo":
                    res = re.sub("RU=(.[^\/]+)\/RK=", self.yahooCleaner,
                                 results[key], 0)
                else:
                    res = results[key]

                matches = re.findall(searchDom, res, re.IGNORECASE)

                if matches is not None:
                    for match in matches:
                        if match in instances:
                            continue
                        else:
                            instances.append(match)

                        if self.checkForStop():
                            return None

                        # Fetch the profile page if we are checking
                        # for a firm relationship.
                        if self.opts['tighten']:
                            pres = self.sf.fetchUrl(
                                match,
                                timeout=self.opts['_fetchtimeout'],
                                useragent=self.opts['_useragent'])

                            if pres['content'] is None:
                                continue
                            else:
                                found = False
                                for kw in self.keywords:
                                    if re.search(
                                            "[^a-zA-Z\-\_]" + kw +
                                            "[^a-zA-Z\-\_]", pres['content'],
                                            re.IGNORECASE):
                                        found = True
                                if not found:
                                    continue

                        self.sf.info("Social Media Profile found at " + site +
                                     ": " + match)
                        evt = SpiderFootEvent("SOCIAL_MEDIA",
                                              site + ": " + match,
                                              self.__name__, event)
                        self.notifyListeners(evt)

                # Submit the bing results for analysis
                evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", res,
                                      self.__name__, event)
                self.notifyListeners(evt)
Exemple #12
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if srcModuleName == 'sfp_peegeepee':
            self.sf.debug("Ignoring " + eventName + ", from self.")
            return None

        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + ", already checked.")
            return None

        self.results[eventData] = True

        keys = self.query(eventData)

        if not keys:
            self.sf.debug('No results found for ' + eventData)
            return None

        names = list()
        emails = list()

        for key in keys:
            name = keys.get(key)[0]
            email = keys.get(key)[1]

            if not email:
                continue

            # Get e-mail addresses on this domain
            if eventName == 'DOMAIN_NAME' or eventName == 'INTERNET_NAME':
                mailDom = email.lower().split('@')[1]

                if not self.getTarget().matches(mailDom):
                    continue

            # Retrieve names for the specified e-mail address
            if eventName == 'EMAILADDR':
                if not email.lower() == eventData.lower():
                    continue

            emails.append(email)
            names.append(name)

        for name in set(names):
            # A bit of a hack. Submit the description to sfp_names
            # and see if it is considered to be a name.
            evt = SpiderFootEvent('RAW_RIR_DATA', 'Possible full name: ' + name,
                                  self.__name__, event)
            self.notifyListeners(evt)

        for email in set(emails):
                evt = SpiderFootEvent('EMAILADDR', email, self.__name__, event)
                self.notifyListeners(evt)

        if self.opts['fetch_keys']:
            for key in keys:
                self.retrieveKey(key, event)
 
        return None
Exemple #13
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None

        self.results[eventData] = True

        data = self.query(eventData)

        if data is None:
            return None

        results = data.get('results')

        if not results:
            return None

        evt = SpiderFootEvent('RAW_RIR_DATA', str(results), self.__name__,
                              event)
        self.notifyListeners(evt)

        urls = list()
        asns = list()
        domains = list()
        locations = list()
        servers = list()

        for res in results:
            page = res.get('page')

            if not page:
                continue

            domain = page.get('domain')

            if not domain:
                continue

            if not self.getTarget().matches(domain, includeParents=True):
                continue

            if domain.lower() != eventData.lower():
                domains.append(domain)

            asn = page.get('asn')

            if asn:
                asns.append(asn.replace('AS', ''))

            location = ', '.join(
                [_f for _f in [page.get('city'),
                               page.get('country')] if _f])

            if location:
                locations.append(location)

            server = page.get('server')

            if server:
                servers.append(server)

            task = res.get('task')

            if not task:
                continue

            url = task.get('url')

            if self.getTarget().matches(self.sf.urlFQDN(url),
                                        includeParents=True):
                urls.append(url)

        for url in set(urls):
            evt = SpiderFootEvent('LINKED_URL_INTERNAL', url, self.__name__,
                                  event)
            self.notifyListeners(evt)

        for location in set(locations):
            evt = SpiderFootEvent('GEOINFO', location, self.__name__, event)
            self.notifyListeners(evt)

        if self.opts['verify'] and len(domains) > 0:
            self.sf.info("Resolving " + str(len(set(domains))) +
                         " domains ...")

        for domain in set(domains):
            if self.opts['verify'] and not self.sf.resolveHost(domain):
                evt = SpiderFootEvent('INTERNET_NAME_UNRESOLVED', domain,
                                      self.__name__, event)
                self.notifyListeners(evt)
            else:
                evt = SpiderFootEvent('INTERNET_NAME', domain, self.__name__,
                                      event)
                self.notifyListeners(evt)

            if self.sf.isDomain(domain, self.opts['_internettlds']):
                evt = SpiderFootEvent('DOMAIN_NAME', domain, self.__name__,
                                      event)
                self.notifyListeners(evt)

        for asn in set(asns):
            evt = SpiderFootEvent('BGP_AS_MEMBER', asn, self.__name__, event)
            self.notifyListeners(evt)

        for server in set(servers):
            evt = SpiderFootEvent('WEBSERVER_BANNER', server, self.__name__,
                                  event)
            self.notifyListeners(evt)

        return None
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        if srcModuleName == 'sfp_fringeproject':
            self.sf.debug("Ignoring " + eventData + ", from self.")
            return None

        if eventData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[eventData] = True

        data = self.query(eventData)

        if not data:
            self.sf.info("No results found for " + eventData)
            return None

        e = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__, event)
        self.notifyListeners(e)

        hosts = list()

        for result in data:
            data_type = result.get('type')

            if data_type not in ['url', 'hostname']:
                self.sf.debug('Unknown result data type: ' + data_type)
                continue

            value = result.get('value')

            if not value:
                continue

            if data_type == 'hostname':
                if not self.getTarget().matches(
                        value, includeChildren=True, includeParents=True):
                    continue

                hosts.append(value)

            if data_type == 'url':
                host = self.sf.urlFQDN(value.lower())

                if not self.getTarget().matches(
                        host, includeChildren=True, includeParents=True):
                    continue

                hosts.append(host)

                evt = SpiderFootEvent('LINKED_URL_INTERNAL', value,
                                      self.__name__, event)
                self.notifyListeners(evt)

            tags = result.get('tags')

            if not tags:
                continue

            for tag in tags:
                try:
                    port = re.findall(r'^port:([0-9]+)', tag)
                except BaseException:
                    self.sf.debug("Didn't get sane data from FringeProject.")
                    continue

                if len(port) > 0:
                    evt = SpiderFootEvent('TCP_PORT_OPEN',
                                          value + ':' + str(port[0]),
                                          self.__name__, event)
                    self.notifyListeners(evt)

        for host in set(hosts):
            evt = SpiderFootEvent('INTERNET_NAME', host, self.__name__, event)
            self.notifyListeners(evt)
            if self.sf.isDomain(host, self.opts['_internettlds']):
                evt = SpiderFootEvent('DOMAIN_NAME', host, self.__name__,
                                      event)
                self.notifyListeners(evt)
Exemple #15
0
    def handleEvent(self, event):
        eventName = event.eventType
        eventData = event.data

        if self.opts['affiliatedomains'] and "AFFILIATE_" in eventName:
            eventData = self.sf.hostDomain(eventData,
                                           self.opts['_internettlds'])
            if not eventData:
                return None

        if eventData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[eventData] = True

        url = "https://api.duckduckgo.com/?q=" + eventData + "&format=json&pretty=1"
        res = self.sf.fetchUrl(url,
                               timeout=self.opts['_fetchtimeout'],
                               useragent="SpiderFoot")

        if res['content'] is None:
            self.sf.error(f"Unable to fetch {url}", False)
            return None

        try:
            ret = json.loads(res['content'])
        except BaseException as e:
            self.sf.error(
                f"Error processing JSON response from DuckDuckGo: {e}", False)
            return None

        if not ret['Heading']:
            self.sf.debug(f"No DuckDuckGo information for {eventData}")
            return None

        # Submit the DuckDuckGo results for analysis
        evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", res['content'],
                              self.__name__, event)
        self.notifyListeners(evt)

        abstract_text = ret.get('AbstractText')
        if abstract_text:
            event_type = "DESCRIPTION_ABSTRACT"

            if "AFFILIATE" in eventName:
                event_type = "AFFILIATE_" + event_type

            evt = SpiderFootEvent(event_type, str(abstract_text),
                                  self.__name__, event)
            self.notifyListeners(evt)

        related_topics = ret.get('RelatedTopics')
        if related_topics:
            event_type = "DESCRIPTION_CATEGORY"

            if "AFFILIATE" in eventName:
                event_type = "AFFILIATE_" + event_type

            for topic in related_topics:
                if not isinstance(topic, dict):
                    self.sf.debug("No category text found from DuckDuckGo.")
                    continue

                category = topic.get('Text')

                if not category:
                    self.sf.debug("No category text found from DuckDuckGo.")
                    continue

                evt = SpiderFootEvent(event_type, category, self.__name__,
                                      event)
                self.notifyListeners(evt)
Exemple #16
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Various ways to identify companies in text
        # Support up to three word company names with each starting with
        # a capital letter, allowing for hyphens brackets and numbers within.
        pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+"
        pattern_match_re = [
            'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 
            'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation',
            'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?',
            'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ]
        pattern_match = [
            'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty',
            'Ltd', 'Pte', 'Inc', 'INC', 'Foundation',
            'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V',
            'NV', 'N.V' 'PLC', 'Limited', 'Pvt.', 'SARL' ]

        pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])"

        # Filter out anything from the company name which matches the below
        filterpatterns = [
            "Copyright",
            "\d{4}" # To catch years
        ]

        # Don't re-parse company names
        if eventName in [ "COMPANY_NAME", "AFFILIATE_COMPANY_NAME" ]:
            return None

        if eventName == "TARGET_WEB_CONTENT":
            url = event.sourceEvent.data
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.")

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [ list, dict ]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find company names: " + \
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " + str(e))
                return None

        # Strip out everything before the O=
        try:
            if eventName == "SSL_CERTIFICATE_ISSUED":
                eventData = eventData.split("O=")[1]
        except BaseException as e:
                self.sf.debug("Couldn't strip out O=, proceeding anyway...")

        # Find chunks of text containing what might be a company name first.
        # This is to avoid running very expensive regexps on large chunks of
        # data.
        chunks = list()
        for pat in pattern_match:
            start = 0
            m = eventData.find(pat, start)
            while m > 0:
                start = m - 50
                if start < 0:
                    start = 0
                end = m + 10
                if end >= len(eventData):
                    end = len(eventData)-1
                chunks.append(eventData[start:end])
                offset = m + len(pat)
                m = eventData.find(pat, offset)

        myres = list()
        for chunk in chunks:
            for pat in pattern_match_re:
                matches = re.findall(pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk, re.MULTILINE|re.DOTALL)
                for match in matches:
                    matched = 0
                    for m in match:
                        if len(m) > 0:
                            matched += 1
                    if matched <= 1:
                        continue

                    fullcompany = ""
                    for m in match:
                        flt = False
                        for f in filterpatterns:
                            if re.match(f, m):
                               flt = True 
                        if not flt:
                            fullcompany += m + " "

                    fullcompany = re.sub("\s+", " ", fullcompany.strip())
                    
                    self.sf.info("Found company name: " + fullcompany)
                    if fullcompany in myres:
                        self.sf.debug("Already found from this source.")
                        continue
                    else:
                        myres.append(fullcompany)

                    if "AFFILIATE_" in eventName:
                        etype = "AFFILIATE_COMPANY_NAME"
                    else:
                        etype = "COMPANY_NAME"

                    evt = SpiderFootEvent(etype, fullcompany, self.__name__, event)
                    if event.moduleDataSource:
                        evt.moduleDataSource = event.moduleDataSource
                    else:
                        evt.moduleDataSource = "Unknown"
                    self.notifyListeners(evt)
Exemple #17
0
    def __startScan(self):
        """Start running a scan."""

        aborted = False

        self.__setStatus("STARTING", time.time() * 1000, None)
        self.__sf.status(f"Scan [{self.__scanId}] initiated.")

        try:
            # moduleList = list of modules the user wants to run
            for modName in self.__moduleList:
                if modName == '':
                    continue

                try:
                    module = __import__('modules.' + modName, globals(), locals(), [modName])
                except ImportError:
                    self.__sf.error("Failed to load module: " + modName, False)
                    continue

                mod = getattr(module, modName)()
                mod.__name__ = modName

                # Module may have been renamed or removed
                if modName not in self.__config['__modules__']:
                    continue

                # Set up the module
                # Configuration is a combined global config with module-specific options
                self.__modconfig[modName] = deepcopy(self.__config['__modules__'][modName]['opts'])
                for opt in list(self.__config.keys()):
                    self.__modconfig[modName][opt] = deepcopy(self.__config[opt])

                mod.clearListeners()  # clear any listener relationships from the past
                mod.setup(self.__sf, self.__modconfig[modName])
                mod.setDbh(self.__dbh)
                mod.setScanId(self.__scanId)

                # Give modules a chance to 'enrich' the original target with
                # aliases of that target.
                newTarget = mod.enrichTarget(self.__target)
                if newTarget is not None:
                    self.__target = newTarget
                self.__moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.__config['_socks1type'] != '':
                    mod._updateSocket(socket)

                # Set up event output filters if requested
                if self.__config['__outputfilter']:
                    mod.setOutputFilter(self.__config['__outputfilter'])

                self.__sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in list(self.__moduleInstances.values()):
                # Register the target with the module
                module.setTarget(self.__target)

                for listenerModule in list(self.__moduleInstances.values()):
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() is not None:
                        module.registerListener(listenerModule)

            # Now we are ready to roll..
            self.__setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(self.__target)
            psMod.setDbh(self.__dbh)
            psMod.clearListeners()
            for mod in list(self.__moduleInstances.values()):
                if mod.watchedEvents() is not None:
                    psMod.registerListener(mod)

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.__targetType == 'INTERNET_NAME':
                if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            for module in list(self.__moduleInstances.values()):
                if module.checkForStop():
                    self.__setStatus('ABORTING')
                    aborted = True
                    break

            if aborted:
                self.__sf.status(f"Scan [{self.__scanId}] aborted.")
                self.__setStatus("ABORTED", None, time.time() * 1000)
            else:
                self.__sf.status(f"Scan [{self.__scanId}] completed.")
                self.__setStatus("FINISHED", None, time.time() * 1000)
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.__sf.error(f"Unhandled exception ({e.__class__.__name__}) encountered during scan."
                            + "Please report this as a bug: "
                            + repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)

        self.__dbh.close()
Exemple #18
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if self.opts['api_key'] == "":
            self.sf.error(
                "You enabled sfp_virustotal but did not set an API key!",
                False)
            self.errorState = True
            return None

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName.startswith(
                "AFFILIATE") and not self.opts['checkaffiliates']:
            return None

        if eventName == 'CO_HOSTED_SITE' and not self.opts['checkcohosts']:
            return None

        if eventName == 'NETBLOCK_OWNER':
            if not self.opts['netblocklookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxnetblock']))
                    return None

        if eventName == 'NETBLOCK_MEMBER':
            if not self.opts['subnetlookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxsubnet']))
                    return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            qrylist.append(eventData)

        for addr in qrylist:
            if self.checkForStop():
                return None

            info = self.query(addr)
            if info is None:
                continue
            if len(info.get('detected_urls', [])) > 0:
                self.sf.info("Found VirusTotal URL data for " + addr)
                if eventName in ["IP_ADDRESS"
                                 ] or eventName.startswith("NETBLOCK_"):
                    evt = "MALICIOUS_IPADDR"
                    infotype = "ip-address"

                if eventName == "AFFILIATE_IPADDR":
                    evt = "MALICIOUS_AFFILIATE_IPADDR"
                    infotype = "ip-address"

                if eventName == "INTERNET_NAME":
                    evt = "MALICIOUS_INTERNET_NAME"
                    infotype = "domain"

                if eventName == "AFFILIATE_INTERNET_NAME":
                    evt = "MALICIOUS_AFFILIATE_INTERNET_NAME"
                    infotype = "domain"

                if eventName == "CO_HOSTED_SITE":
                    evt = "MALICIOUS_COHOST"
                    infotype = "domain"

                infourl = "<SFURL>https://www.virustotal.com/en/" + infotype + "/" + \
                          addr + "/information/</SFURL>"

                # Notify other modules of what you've found
                e = SpiderFootEvent(evt,
                                    "VirusTotal [" + addr + "]\n" + infourl,
                                    self.__name__, event)
                self.notifyListeners(e)

            # Treat siblings as affiliates if they are of the original target, otherwise
            # they are additional hosts within the target.
            if 'domain_siblings' in info:
                if eventName in ["IP_ADDRESS", "INTERNET_NAME"]:
                    for s in info['domain_siblings']:
                        if self.getTarget().matches(s):
                            if s not in self.results:
                                if self.opts['verify']:
                                    if not self.sf.resolveHost(s):
                                        e = SpiderFootEvent(
                                            "INTERNET_NAME_UNRESOLVED", s,
                                            self.__name__, event)
                                        self.notifyListeners(e)
                                    else:
                                        e = SpiderFootEvent(
                                            "INTERNET_NAME", s, self.__name__,
                                            event)
                                        self.notifyListeners(e)

                                if self.sf.isDomain(
                                        s, self.opts['_internettlds']):
                                    e = SpiderFootEvent(
                                        "DOMAIN_NAME", s, self.__name__, event)
                                    self.notifyListeners(e)
                        else:
                            if s not in self.results:
                                e = SpiderFootEvent("AFFILIATE_INTERNET_NAME",
                                                    s, self.__name__, event)
                                self.notifyListeners(e)

            if 'subdomains' in info and eventName == "INTERNET_NAME":
                for n in info['subdomains']:
                    if n not in self.results:
                        if self.opts['verify']:
                            if not self.sf.resolveHost(n):
                                e = SpiderFootEvent("INTERNET_NAME_UNRESOLVED",
                                                    n, self.__name__, event)
                                self.notifyListeners(e)
                        else:
                            e = SpiderFootEvent("INTERNET_NAME", n,
                                                self.__name__, event)
                            self.notifyListeners(e)

                        if self.sf.isDomain(n, self.opts['_internettlds']):
                            e = SpiderFootEvent("DOMAIN_NAME", n,
                                                self.__name__, event)
                            self.notifyListeners(e)
Exemple #19
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventData in self.results:
            self.sf.debug("Already did a search for " + eventData +
                          ", skipping.")
            return None
        else:
            self.results.append(eventData)

        if eventName == "DOMAIN_NAME":
            name = self.sf.domainKeyword(eventData, self.opts['_internettlds'])
        if eventName == "USERNAME":
            name = eventData
        if eventName == "SOCIAL_MEDIA":
            name = eventData.split(": ")[1]

        self.sf.debug("Looking at " + name)
        failed = False
        # Get all the repositories based on direct matches with the
        # name identified
        url = "https://api.github.com/search/repositories?q=" + name
        res = self.sf.fetchUrl(url,
                               timeout=self.opts['_fetchtimeout'],
                               useragent="SpiderFoot")

        if res['content'] == None:
            self.sf.error("Unable to fetch " + url, False)
            failed = True

        try:
            ret = json.loads(res['content'])
        except BaseException as e:
            ret = None

        if ret == None:
            self.sf.error("Unable to process empty response from Github for: " + \
                          name, False)
            failed = True

        if not failed:
            if ret['total_count'] == "0" or len(ret['items']) == 0:
                self.sf.debug("No Github information for " + name)
                failed = True

        if not failed:
            for item in ret['items']:
                repo_info = self.buildRepoInfo(item)
                if repo_info != None:
                    if self.opts['namesonly'] and name not in item['name']:
                        continue

                    evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info,
                                          self.__name__, event)
                    self.notifyListeners(evt)

        # Now look for users matching the name found
        failed = False
        url = "https://api.github.com/search/users?q=" + name
        res = self.sf.fetchUrl(url,
                               timeout=self.opts['_fetchtimeout'],
                               useragent="SpiderFoot")

        if res['content'] == None:
            self.sf.error("Unable to fetch " + url, False)
            failed = True

        if not failed:
            ret = json.loads(res['content'])
            if ret == None:
                self.sf.error("Unable to process empty response from Github for: " + \
                              name, False)
                failed = True

        if not failed:
            if ret['total_count'] == "0" or len(ret['items']) == 0:
                self.sf.debug("No Github information for " + name)
                failed = True

        if not failed:
            # For each user matching the name, get their repos
            for item in ret['items']:
                if item['repos_url'] == None:
                    self.sf.debug(
                        "Incomplete Github information found (repos_url).")
                    continue

                url = item['repos_url']
                res = self.sf.fetchUrl(url,
                                       timeout=self.opts['_fetchtimeout'],
                                       useragent="SpiderFoot")

                if res['content'] == None:
                    self.sf.error("Unable to fetch " + url, False)
                    continue

                repret = json.loads(res['content'])
                if repret == None:
                    self.sf.error("Unable to process empty response from Github for: " + \
                                  name, False)
                    continue

                for item in repret:
                    repo_info = self.buildRepoInfo(item)
                    if repo_info != None:
                        if self.opts['namesonly'] and name not in item['name']:
                            continue

                        evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info,
                                              self.__name__, event)
                        self.notifyListeners(evt)
Exemple #20
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + ", already checked.")
            return None
        else:
            self.results[eventData] = True

        if eventName == 'CO_HOSTED_SITE' and not self.opts.get(
                'checkcohosts', False):
            return None
        if eventName == 'AFFILIATE_IPADDR' \
                and not self.opts.get('checkaffiliates', False):
            return None
        if eventName == 'NETBLOCK_OWNER' and not self.opts.get(
                'checknetblocks', False):
            return None
        if eventName == 'NETBLOCK_MEMBER' and not self.opts.get(
                'checksubnets', False):
            return None

        for check in list(malchecks.keys()):
            cid = malchecks[check]['id']
            # If the module is enabled..
            if self.opts[cid]:
                if eventName in ['IP_ADDRESS', 'AFFILIATE_IPADDR']:
                    typeId = 'ip'
                    if eventName == 'IP_ADDRESS':
                        evtType = 'MALICIOUS_IPADDR'
                    else:
                        evtType = 'MALICIOUS_AFFILIATE_IPADDR'

                if eventName in ['BGP_AS_OWNER', 'BGP_AS_MEMBER']:
                    typeId = 'asn'
                    evtType = 'MALICIOUS_ASN'

                if eventName in [
                        'INTERNET_NAME',
                        'CO_HOSTED_SITE',
                        'AFFILIATE_INTERNET_NAME',
                ]:
                    typeId = 'domain'
                    if eventName == "INTERNET_NAME":
                        evtType = "MALICIOUS_INTERNET_NAME"
                    if eventName == 'AFFILIATE_INTERNET_NAME':
                        evtType = 'MALICIOUS_AFFILIATE_INTERNET_NAME'
                    if eventName == 'CO_HOSTED_SITE':
                        evtType = 'MALICIOUS_COHOST'

                if eventName == 'NETBLOCK_OWNER':
                    typeId = 'netblock'
                    evtType = 'MALICIOUS_NETBLOCK'
                if eventName == 'NETBLOCK_MEMBER':
                    typeId = 'netblock'
                    evtType = 'MALICIOUS_SUBNET'

                url = self.lookupItem(cid, typeId, eventData)
                if self.checkForStop():
                    return None

                # Notify other modules of what you've found
                if url is not None:
                    text = check + " [" + eventData + "]\n" + "<SFURL>" + url + "</SFURL>"
                    evt = SpiderFootEvent(evtType, text, self.__name__, event)
                    self.notifyListeners(evt)

        return None
Exemple #21
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        parentEvent = event.sourceEvent
        eventSource = event.sourceEvent.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)
        if eventSource in self.results:
            return None
        else:
            self.results[eventSource] = True

        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug(
                "Not collecting web server information for external sites.")
            return None

        try:
            jdata = json.loads(eventData)
            if jdata == None:
                return None
        except BaseException as e:
            self.sf.error(
                "Received HTTP headers from another module in an unexpected format.",
                False)
            return None

        # Could apply some smarts here, for instance looking for certain
        # banners and therefore classifying them further (type and version,
        # possibly OS. This could also trigger additional tests, such as 404s
        # and other errors to see what the header looks like.
        if 'server' in jdata:
            evt = SpiderFootEvent("WEBSERVER_BANNER", jdata['server'],
                                  self.__name__, parentEvent)
            self.notifyListeners(evt)

            self.sf.info("Found web server: " + jdata['server'] + " (" +
                         eventSource + ")")

        if 'x-powered-by' in jdata:
            evt = SpiderFootEvent("WEBSERVER_TECHNOLOGY",
                                  jdata['x-powered-by'], self.__name__,
                                  parentEvent)
            self.notifyListeners(evt)
            return None

        tech = None
        if 'set-cookie' in jdata and 'PHPSESS' in jdata['set-cookie']:
            tech = "PHP"

        if 'set-cookie' in jdata and 'JSESSIONID' in jdata['set-cookie']:
            tech = "Java/JSP"

        if 'set-cookie' in jdata and 'ASP.NET' in jdata['set-cookie']:
            tech = "ASP.NET"

        if 'x-aspnet-version' in jdata:
            tech = "ASP.NET"

        if tech is not None and '.jsp' in eventSource:
            tech = "Java/JSP"

        if tech is not None and '.php' in eventSource:
            tech = "PHP"

        if tech is not None:
            evt = SpiderFootEvent("WEBSERVER_TECHNOLOGY", tech, self.__name__,
                                  parentEvent)
            self.notifyListeners(evt)
Exemple #22
0
 def getIssuer(self, cert, sevt):
     issuer = cert.get_issuer().as_text().encode('raw_unicode_escape')
     evt = SpiderFootEvent("SSL_CERTIFICATE_ISSUER", issuer, self.__name__,
                           sevt)
     self.notifyListeners(evt)
    def handleEvent(self, event):
        eventName = event.eventType
        eventData = event.data

        if eventData in self.results:
            self.sf.debug(f"Already did a search for {eventData}, skipping.")
            return None

        self.results[eventData] = True

        # Extract name and location from profile
        if eventName == "SOCIAL_MEDIA":
            try:
                network = eventData.split(": ")[0]
                url = eventData.split(": ")[1].replace("<SFURL>", "").replace("</SFURL>", "")
            except BaseException as e:
                self.sf.error(f"Unable to parse SOCIAL_MEDIA: {eventData} ({e})", False)
                return None

            if not network == "Github":
                self.sf.debug(f"Skipping social network profile, {url}, as not a GitHub profile")
                return None

            try:
                urlParts = url.split("/")
                username = urlParts[len(urlParts)-1]
            except BaseException:
                self.sf.debug(f"Couldn't get a username out of {url}")
                return None

            res = self.sf.fetchUrl(
                f"https://api.github.com/users/{username}",
                timeout=self.opts['_fetchtimeout'],
                useragent=self.opts['_useragent']
            )

            if res['content'] is None:
                return None

            try:
                json_data = json.loads(res['content'])
            except BaseException as e:
                self.sf.debug(f"Error processing JSON response: {e}")
                return None

            if not json_data.get('login'):
                self.sf.debug(f"{username} is not a valid GitHub profile")
                return None

            full_name = json_data.get('name')

            if not full_name:
                self.sf.debug(f"{username} is not a valid GitHub profile")
                return None

            e = SpiderFootEvent("RAW_RIR_DATA", "Possible full name: {full_name}", self.__name__, event)
            self.notifyListeners(e)

            location = json_data.get('location')

            if location is None:
                return None

            if len(location) < 3 or len(location) > 100:
                self.sf.debug(f"Skipping likely invalid location: {location}")
                return None

            e = SpiderFootEvent("GEOINFO", location, self.__name__, event)
            self.notifyListeners(e)

            return None

        if eventName == "DOMAIN_NAME":
            username = self.sf.domainKeyword(eventData, self.opts['_internettlds'])
            if not username:
                return None

        if eventName == "USERNAME":
            username = eventData

        self.sf.debug(f"Looking at {username}")
        failed = False

        # Get all the repositories based on direct matches with the
        # name identified
        url = f"https://api.github.com/search/repositories?q={username}"
        res = self.sf.fetchUrl(
            url,
            timeout=self.opts['_fetchtimeout'],
            useragent=self.opts['_useragent']
        )

        if res['content'] is None:
            self.sf.error(f"Unable to fetch {url}", False)
            failed = True

        if not failed:
            try:
                ret = json.loads(res['content'])
            except BaseException as e:
                self.sf.debug(f"Error processing JSON response from GitHub: {e}")
                ret = None

            if ret is None:
                self.sf.error(f"Unable to process empty response from Github for: {username}", False)
                failed = True

        if not failed:
            if ret.get('total_count', "0") == "0" or len(ret['items']) == 0:
                self.sf.debug(f"No Github information for {username}")
                failed = True

        if not failed:
            for item in ret['items']:
                repo_info = self.buildRepoInfo(item)
                if repo_info is not None:
                    if self.opts['namesonly'] and username != item['name']:
                        continue

                    evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info, self.__name__, event)
                    self.notifyListeners(evt)

        # Now look for users matching the name found
        failed = False
        url = f"https://api.github.com/search/users?q={username}"
        res = self.sf.fetchUrl(
            url,
            timeout=self.opts['_fetchtimeout'],
            useragent=self.opts['_useragent']
        )

        if res['content'] is None:
            self.sf.error(f"Unable to fetch {url}", False)
            failed = True

        if not failed:
            try:
                ret = json.loads(res['content'])
                if ret is None:
                    self.sf.error(f"Unable to process empty response from Github for: {username}", False)
                    failed = True
            except BaseException:
                self.sf.error(f"Unable to process invalid response from Github for: {username}", False)
                failed = True

        if not failed:
            if ret.get('total_count', "0") == "0" or len(ret['items']) == 0:
                self.sf.debug("No Github information for " + username)
                failed = True

        if not failed:
            # For each user matching the username, get their repos
            for item in ret['items']:
                if item.get('repos_url') is None:
                    self.sf.debug("Incomplete Github information found (repos_url).")
                    continue

                url = item['repos_url']
                res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'],
                                       useragent=self.opts['_useragent'])

                if res['content'] is None:
                    self.sf.error(f"Unable to fetch {url}", False)
                    continue

                try:
                    repret = json.loads(res['content'])
                except BaseException as e:
                    self.sf.error(f"Invalid JSON returned from Github: {e}", False)
                    continue

                if repret is None:
                    self.sf.error(f"Unable to process empty response from Github for: {username}", False)
                    continue

                for item in repret:
                    if type(item) != dict:
                        self.sf.debug("Encountered an unexpected or empty response from Github.")
                        continue

                    repo_info = self.buildRepoInfo(item)
                    if repo_info is not None:
                        if self.opts['namesonly'] and item['name'] != username:
                            continue
                        if eventName == "USERNAME" and "/" + username + "/" not in item.get('html_url', ''):
                            continue

                        evt = SpiderFootEvent("PUBLIC_CODE_REPO", repo_info,
                                              self.__name__, event)
                        self.notifyListeners(evt)
Exemple #24
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        parentEvent = event

        sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if self.results.has_key(eventData):
            return None

        self.results[eventData] = True

        for domain in self.checks:
            try:
                lookup = self.reverseAddr(eventData) + "." + domain
                sf.debug("Checking Blacklist: " + lookup)
                addrs = socket.gethostbyname_ex(lookup)
                sf.debug("Addresses returned: " + str(addrs))

                text = None
                for addr in addrs:
                    if type(addr) == list:
                        for a in addr:
                            if type(self.checks[domain]) is str:
                                text = self.checks[domain]
                                break
                            else:
                                if str(a) not in self.checks[domain].keys():
                                    sf.debug("Return code not found in list: " + str(a))
                                    continue
                                k = str(a)
                                text = self.checks[domain][k]
                                break

                    else:
                        if type(self.checks[domain]) is str:
                            text = self.checks[domain]
                            break
                        else:
                            if str(addr) not in self.checks.keys():
                                sf.debug("Return code not found in list: " + str(addr))
                                continue

                            k = str(addr)
                            text = self.checks[domain][k]
                            break
                
                if text != None:
                    if eventName == "AFFILIATE_IPADDR":
                        evt = SpiderFootEvent('BLACKLISTED_AFFILIATE_IPADDR',
                            text, self.__name__, parentEvent)
                        self.notifyListeners(evt)
                    else:
                        evt = SpiderFootEvent('BLACKLISTED_IPADDR', 
                            text, self.__name__, parentEvent)
                        self.notifyListeners(evt)
            except BaseException as e:
                sf.debug("Unable to resolve " + eventData + " / " + lookup + ": " + str(e))
 
        return None
Exemple #25
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        if self.opts['api_key'] == "":
            self.sf.error(
                "You enabled sfp_greynoise but did not set an API key!", False)
            self.errorState = True
            return None

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None
        else:
            self.results[eventData] = True

        if eventName == 'NETBLOCK_OWNER':
            if not self.opts['netblocklookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxnetblock']))
                    return None

        if eventName == 'NETBLOCK_MEMBER':
            if not self.opts['subnetlookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxsubnet']))
                    return None

        if eventName == 'IP_ADDRESS' or eventName.startswith('NETBLOCK_'):
            evtType = 'MALICIOUS_IPADDR'
        if eventName == "AFFILIATE_IPADDR":
            evtType = 'MALICIOUS_AFFILIATE_IPADDR'

        ret = self.queryIP(eventData)

        if not ret:
            return None

        if "data" not in ret:
            return None

        if len(ret["data"]) > 0:
            for rec in ret["data"]:
                if rec.get("seen", None):
                    self.sf.debug("Found threat info in Greynoise")
                    lastseen = rec.get("last_seen", "1970-01-01")
                    lastseen_dt = datetime.strptime(lastseen, '%Y-%m-%d')
                    lastseen_ts = int(time.mktime(lastseen_dt.timetuple()))
                    age_limit_ts = int(
                        time.time()) - (86400 * self.opts['age_limit_days'])
                    if self.opts[
                            'age_limit_days'] > 0 and lastseen_ts < age_limit_ts:
                        self.sf.debug("Record found but too old, skipping.")
                        return None

                    # Only report meta data about the target, not affiliates
                    if rec.get("metadata") and eventName == "IP_ADDRESS":
                        met = rec.get("metadata")
                        if met.get("country", "unknown") != "unknown":
                            loc = ""
                            if met.get("city"):
                                loc = met.get("city") + ", "
                            loc += met.get("country")
                            e = SpiderFootEvent("GEOINFO", loc, self.__name__,
                                                event)
                            self.notifyListeners(e)
                        if met.get("asn", "unknown") != "unknown":
                            asn = met.get("asn").replace("AS", "")
                            e = SpiderFootEvent("BGP_AS_MEMBER", asn,
                                                self.__name__, event)
                            self.notifyListeners(e)
                        if met.get("organization", "unknown") != "unknown":
                            e = SpiderFootEvent("COMPANY_NAME",
                                                met.get("organization"),
                                                self.__name__, event)
                            self.notifyListeners(e)
                        if met.get("os", "unknown") != "unknown":
                            e = SpiderFootEvent("OPERATING_SYSTEM",
                                                met.get("os"), self.__name__,
                                                event)
                            self.notifyListeners(e)
                        e = SpiderFootEvent("RAW_RIR_DATA", str(rec),
                                            self.__name__, event)
                        self.notifyListeners(e)

                    if rec.get("classification"):
                        descr = "Greynoise [" + eventData + "]\n - Classification: " + rec.get(
                            "classification")
                        if rec.get("tags"):
                            descr += ", Tags: " + ", ".join(rec.get("tags"))
                        else:
                            descr += "\n - " + "Raw data: " + str(
                                rec.get("raw_data"))
                        descr += "\n<SFURL>https://viz.greynoise.io/ip/" + eventData + "</SFURL>"
                        e = SpiderFootEvent(evtType, descr, self.__name__,
                                            event)
                        self.notifyListeners(e)
Exemple #26
0
    def handleEvent(self, event):
        # We are only interested in the raw data from the spidering module
        # because the spidering module will always provide events with the
        # event.sourceEvent.data set to the URL of the source.
        if "sfp_spider" not in event.module:
            self.sf.debug("Ignoring web content from " + event.module)
            return None

        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        eventSource = event.actualSource

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # We aren't interested in describing pages that are not hosted on
        # our base domain.
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug("Not gathering page info for external site " +
                          eventSource)
            return None

        if eventSource not in self.results:
            self.results[eventSource] = list()
        else:
            self.sf.debug(
                "Already checked this page for a page type, skipping.")
            return None

        # Check the configured regexps to determine the page type
        for regexpGrp in regexps:
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                rx = re.compile(regex, re.IGNORECASE)
                matches = re.findall(rx, eventData)
                if len(matches
                       ) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " +
                                 eventSource)
                    self.results[eventSource] = self.results[eventSource] + [
                        regexpGrp
                    ]
                    evt = SpiderFootEvent(regexpGrp, eventSource,
                                          self.__name__, event)
                    self.notifyListeners(evt)

        # If no regexps were matched, consider this a static page
        if len(self.results[eventSource]) == 0:
            self.sf.info("Treating " + eventSource + " as URL_STATIC")
            evt = SpiderFootEvent("URL_STATIC", eventSource, self.__name__,
                                  event)
            self.notifyListeners(evt)

        # Check for externally referenced Javascript pages
        pat = re.compile("<script.*src=[\'\"]?([^\'\">]*)", re.IGNORECASE)
        matches = re.findall(pat, eventData)
        if len(matches) > 0:
            for match in matches:
                if '://' in match and not self.getTarget().matches(
                        self.sf.urlFQDN(match)):
                    self.sf.debug("Externally hosted Javascript found at: " +
                                  match)
                    evt = SpiderFootEvent("PROVIDER_JAVASCRIPT", match,
                                          self.__name__, event)
                    self.notifyListeners(evt)

        return None
Exemple #27
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[eventData] = True

        if eventName == 'NETBLOCK_OWNER':
            if not self.opts['netblocklookup']:
                return None

            if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                self.sf.debug("Network size bigger than permitted: " +
                              str(IPNetwork(eventData).prefixlen) + " > " +
                              str(self.opts['maxnetblock']))
                return None

        if eventName == 'NETBLOCK_MEMBER':
            if not self.opts['subnetlookup']:
                return None

            if IPNetwork(eventData).prefixlen < self.opts['maxsubnet']:
                self.sf.debug("Network size bigger than permitted: " +
                              str(IPNetwork(eventData).prefixlen) + " > " +
                              str(self.opts['maxsubnet']))
                return None

        qrylist = list()
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            # If user has enabled affiliate checking
            if eventName == "AFFILIATE_IPADDR" and not self.opts[
                    'checkaffiliates']:
                return None
            qrylist.append(eventData)

        for addr in qrylist:

            if self.checkForStop():
                return None

            data = self.queryIPAddress(addr)

            if data is None:
                break

            try:
                maliciousIP = data[0].get('ip')
            except:
                # If ArrayIndex is out of bounds then data doesn't exist
                continue

            if maliciousIP is None:
                continue

            if addr != maliciousIP:
                self.sf.error(
                    "Reported address doesn't match requested, skipping",
                    False)
                continue

            # Data is reported about the IP Address
            if eventName.startswith("NETBLOCK_"):
                ipEvt = SpiderFootEvent("IP_ADDRESS", addr, self.__name__,
                                        event)
                self.notifyListeners(ipEvt)

            if eventName.startswith("NETBLOCK_"):
                evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__,
                                      ipEvt)
                self.notifyListeners(evt)
            else:
                evt = SpiderFootEvent("RAW_RIR_DATA", str(data), self.__name__,
                                      event)
                self.notifyListeners(evt)

            maliciousIPDesc = f"Phishstats [{maliciousIP}]\n"

            maliciousIPDescHash = self.sf.hashstring(maliciousIPDesc)
            if maliciousIPDescHash in self.results:
                continue
            self.results[maliciousIPDescHash] = True

            if eventName.startswith("NETBLOCK_"):
                evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc,
                                      self.__name__, ipEvt)
            elif eventName.startswith("AFFILIATE_"):
                evt = SpiderFootEvent("MALICIOUS_AFFILIATE_IPADDR",
                                      maliciousIPDesc, self.__name__, event)
            else:
                evt = SpiderFootEvent("MALICIOUS_IPADDR", maliciousIPDesc,
                                      self.__name__, event)

            self.notifyListeners(evt)

        return None
Exemple #28
0
 def sendEvent(self, source, result):
     self.sf.info("Found a brute-forced host: " + result)
     # Report the host
     evt = SpiderFootEvent("INTERNET_NAME", result, self.__name__, source)
     self.notifyListeners(evt)
Exemple #29
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        ret = None

        if self.errorState:
            return None

        # Ignore messages from myself
        if srcModuleName == "sfp_circllu":
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if self.opts['api_key_login'] == "" or self.opts[
                'api_key_password'] == "":
            self.sf.error(
                "You enabled sfp_circllu but did not set an credentials!",
                False)
            self.errorState = True
            return None

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            self.results[eventData] = True

        if eventName in ['IP_ADDRESS', 'NETBLOCK_OWNER']:
            # CIRCL.LU limit the maximum subnet size to 23
            # http://circl.lu/services/passive-ssl/
            if "/" in eventData:
                addr, mask = eventData.split("/")
                if int(mask) < 23:
                    self.sf.debug(
                        "Network size bigger than permitted by CIRCL.LU.")
                else:
                    ret = self.query(eventData, "PSSL")
                    if not ret:
                        self.sf.info(
                            "No CIRCL.LU passive SSL data found for " +
                            eventData)
            else:
                ret = self.query(eventData, "PSSL")
                if not ret:
                    self.sf.info("No CIRCL.LU passive SSL data found for " +
                                 eventData)

            if ret:
                try:
                    # Generate an event for the IP first, and then link the cert
                    # to that event.
                    j = json.loads(ret)
                    for ip in j:
                        ipe = event
                        if ip != eventData:
                            ipe = SpiderFootEvent("IP_ADDRESS", ip,
                                                  self.__name__, event)
                            self.notifyListeners(ipe)
                        for crt in j[ip]['subjects']:
                            r = re.findall(
                                ".*[\"\'](.+CN=([a-zA-Z0-9\-\*\.])+)[\"\'].*",
                                str(j[ip]['subjects'][crt]), re.IGNORECASE)
                            if r:
                                e = SpiderFootEvent("SSL_CERTIFICATE_ISSUED",
                                                    r[0][0], self.__name__,
                                                    ipe)
                                self.notifyListeners(e)
                except BaseException as e:
                    self.sf.error(
                        "Invalid response returned from CIRCL.LU: " + str(e),
                        False)

        if eventName in ['IP_ADDRESS', 'INTERNET_NAME', 'DOMAIN_NAME']:
            ret = self.query(eventData, "PDNS")
            if not ret:
                self.sf.info("No CIRCL.LU passive DNS data found for " +
                             eventData)
                return None

            # CIRCL.LU doesn't return valid JSON - it's one JSON record per line
            for line in ret.split("\n"):
                if len(line) < 2:
                    continue
                try:
                    rec = json.loads(line)
                except BaseException as e:
                    self.sf.error(
                        "Invalid response returned from CIRCL.LU: " + str(e),
                        False)
                    continue

                age_limit_ts = int(
                    time.time()) - (86400 * self.opts['age_limit_days'])
                if self.opts['age_limit_days'] > 0 and rec[
                        'time_last'] < age_limit_ts:
                    self.sf.debug("Record found but too old, skipping.")
                    continue

                cohosts = list()
                if eventName == "IP_ADDRESS":
                    # Record could be pointing to our IP, or from our IP
                    if rec['rrtype'] == "A" and rec['rdata'] == eventData:
                        if not self.getTarget().matches(rec['rrname']):
                            # We found a co-host
                            cohosts.append(rec['rrname'])

                if eventName in ["INTERNET_NAME", "DOMAIN_NAME"]:
                    # Record could be an A/CNAME of this entity, or something pointing to it
                    if rec['rdata'] == eventData:
                        if not self.getTarget().matches(rec['rrname']):
                            # We found a co-host
                            cohosts.append(rec['rrname'])

                for co in cohosts:
                    if eventName == "IP_ADDRESS" and (
                            self.opts['verify']
                            and not self.validateIP(co, eventData)):
                        self.sf.debug("Host no longer resolves to our IP.")
                        continue

                    if not self.opts['cohostsamedomain']:
                        if self.getTarget().matches(co, includeParents=True):
                            self.sf.debug("Skipping " + co +
                                          " because it is on the same domain.")
                            continue

                    e = SpiderFootEvent("CO_HOSTED_SITE", co, self.__name__,
                                        event)
                    self.notifyListeners(e)
Exemple #30
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        users = list()

        if self.errorState:
            return None

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # Skip events coming from me unless they are USERNAME events
        if eventName != "USERNAME" and srcModuleName == "sfp_accounts":
            return None

        if eventData not in list(self.results.keys()):
            self.results[eventData] = True
        else:
            return None

        # If being called for the first time, let's see how trusted the
        # sites are by attempting to fetch a garbage user.
        if not self.distrustedChecked:
            # Check if a state cache exists first, to not have to do this all the time
            content = self.sf.cacheGet("sfaccounts_state", 72)
            if content:
                delsites = list()
                for line in content.split("\n"):
                    if line == '':
                        continue
                    delsites.append(line)
                self.sites = [d for d in self.sites if d['name'] not in delsites]
            else:
                randpool = 'abcdefghijklmnopqrstuvwxyz1234567890'
                randuser = ''.join([random.SystemRandom().choice(randpool) for x in range(10)])
                res = self.batchSites(randuser)
                if len(res) > 0:
                    delsites = list()
                    for site in res:
                        sitename = site.split(" (Category:")[0]
                        self.sf.debug("Distrusting " + sitename)
                        delsites.append(sitename)
                    self.sites = [d for d in self.sites if d['name'] not in delsites]
                    self.sf.cachePut("sfaccounts_state", delsites)

            self.distrustedChecked = True

        if eventName == "HUMAN_NAME":
            names = [ eventData.lower().replace(" ", ""), eventData.lower().replace(" ", ".") ]
            for name in names:
                users.append(name)

        if eventName == "DOMAIN_NAME":
            kw = self.sf.domainKeyword(eventData, self.opts['_internettlds'])
            users.append(kw)

        if eventName == "EMAILADDR":
            name = eventData.split("@")[0].lower()
            users.append(name)

        if eventName == "USERNAME":
            users.append(eventData)

        for user in users:
            adduser = True
            if self.opts['generic'] is list() and user in self.opts['generic']:
                self.sf.debug(user + " is a generic account name, skipping.")
                continue

            if self.opts['ignorenamedict'] and user in self.commonNames:
                self.sf.debug(user + " is found in our name dictionary, skipping.")
                continue

            if self.opts['ignoreworddict'] and user in self.words:
                self.sf.debug(user + " is found in our word dictionary, skipping.")
                continue

            res = self.batchSites(user)
            for site in res:
                evt = SpiderFootEvent("ACCOUNT_EXTERNAL_OWNED", site,
                                      self.__name__, event)
                self.notifyListeners(evt)

            if user not in self.reportedUsers and eventData != user:
                evt = SpiderFootEvent("USERNAME", user, self.__name__, event)
                self.notifyListeners(evt)
                self.reportedUsers.append(user)
Exemple #31
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.sourceEvent.data
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(map(str.capitalize, eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)")
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug("Both first and second names are not in the dictionary, so high chance of name: (" + first +":" + second +").")
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second + " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Exemple #32
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(map(str.capitalize, eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)")
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                p += 75
                notindict = True

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            if p > self.opts['algotune']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if self.errorState:
            return None

        if eventData in self.results:
            return None

        if self.opts['api_key'] == '':
            self.sf.error(
                "You enabled sfp_networksdb but did not set an API key!",
                False)
            self.errorState = True
            return None

        self.results[eventData] = True

        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        if eventName in ["IP_ADDRESS", "IPV6_ADDRESS"]:
            data = self.queryIpInfo(eventData)

            if data is None:
                self.sf.debug("No IP address information found for " +
                              eventData)
            else:
                evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__,
                                      event)
                self.notifyListeners(evt)

                network = data.get('network')
                if network:
                    cidr = network.get('cidr')
                    if cidr and cidr != 'N/A':
                        evt = SpiderFootEvent('NETBLOCK_MEMBER', cidr,
                                              self.__name__, event)
                        self.notifyListeners(evt)

            data = self.queryIpGeo(eventData)

            if data is None:
                self.sf.debug("No IP geolocation information found for " +
                              eventData)
            else:
                evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__,
                                      event)
                self.notifyListeners(evt)

                if data.get('country'):
                    location = ', '.join(
                        filter(None, [
                            data.get('city'),
                            data.get('state'),
                            data.get('country')
                        ]))
                    evt = SpiderFootEvent('GEOINFO', location, self.__name__,
                                          event)
                    self.notifyListeners(evt)

            data = self.queryReverseDns(eventData)

            cohosts = list()

            if data is None:
                self.sf.debug("No reverse DNS results for " + eventData)
            else:
                evt = SpiderFootEvent('RAW_RIR_DATA', str(data), self.__name__,
                                      event)
                self.notifyListeners(evt)

                results = data.get('results')
                if results:
                    for domain in results:
                        cohosts.append(domain)

            for co in set(cohosts):
                if self.checkForStop():
                    return None

                if co in self.results:
                    continue

                if self.opts['verify'] and not self.sf.validateIP(
                        co, eventData):
                    self.sf.debug("Host " + co + " no longer resolves to " +
                                  eventData)
                    continue

                if not self.opts['cohostsamedomain']:
                    if self.getTarget().matches(co, includeParents=True):
                        evt = SpiderFootEvent('INTERNET_NAME', co,
                                              self.__name__, event)
                        self.notifyListeners(evt)
                        if self.sf.isDomain(co, self.opts['_internettlds']):
                            evt = SpiderFootEvent('DOMAIN_NAME', co,
                                                  self.__name__, event)
                            self.notifyListeners(evt)
                        continue

                if self.cohostcount < self.opts['maxcohost']:
                    evt = SpiderFootEvent('CO_HOSTED_SITE', co, self.__name__,
                                          event)
                    self.notifyListeners(evt)
                    self.cohostcount += 1

        if eventName in ["INTERNET_NAME", "DOMAIN_NAME"]:
            data = self.queryForwardDns(eventData)

            if data is None:
                self.sf.debug("No forward DNS results for " + eventData)
                return None

            res = data.get('results')

            if not res:
                self.sf.debug("No forward DNS results for " + eventData)
                return None

            evt = SpiderFootEvent('RAW_RIR_DATA', str(res), self.__name__,
                                  event)
            self.notifyListeners(evt)

            for ip in res:
                if self.sf.validIP(ip):
                    evt = SpiderFootEvent('IP_ADDRESS', ip, self.__name__,
                                          event)
                    self.notifyListeners(evt)
                elif self.sf.validIP6(ip):
                    evt = SpiderFootEvent('IPV6_ADDRESS', ip, self.__name__,
                                          event)
                    self.notifyListeners(evt)
Exemple #34
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results[sourceData] = True

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventName in [
                'TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS'
        ]:
            # Make potential phone numbers more friendly to parse
            content = eventData.replace('.', '-')

            for match in phonenumbers.PhoneNumberMatcher(content, region=None):
                n = phonenumbers.format_number(
                    match.number, phonenumbers.PhoneNumberFormat.E164)
                evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)

        if eventName == 'PHONE_NUMBER':
            try:
                number = phonenumbers.parse(eventData)
            except BaseException as e:
                self.sf.debug('Error parsing phone number: ' + str(e))
                return None

            try:
                number_carrier = carrier.name_for_number(number, 'en')
            except BaseException as e:
                self.sf.debug('Error retrieving phone number carrier: ' +
                              str(e))
                return None

            if number_carrier:
                evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier,
                                      self.__name__, event)
                self.notifyListeners(evt)
            else:
                self.sf.debug("No carrier information found for " + eventData)

            #try:
            #    location = geocoder.description_for_number(number, 'en')
            #except BaseException as e:
            #    self.sf.debug('Error retrieving phone number location: ' + str(e))
            #    return None

            #if location:
            #    evt = SpiderFootEvent("GEOINFO", location, self.__name__, event)
            #    self.notifyListeners(evt)
            #else:
            #    self.sf.debug("No location information found for " + eventData)

        return None
Exemple #35
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        # The SIMILARDOMAIN and CO_HOSTED_SITE events supply domains,
        # not URLs. Assume HTTP.
        if eventName in ['SIMILARDOMAIN', 'CO_HOSTED_SITE']:
            eventData = 'http://' + eventData.lower()

        # We are only interested in external sites for the crossref
        if self.getTarget().matches(self.sf.urlFQDN(eventData)):
            self.sf.debug("Ignoring " + eventData + " as not external")
            return None

        if eventData in self.fetched:
            self.sf.debug("Ignoring " + eventData + " as already tested")
            return
        else:
            self.fetched[eventData] = True

        self.sf.debug("Testing for affiliation: " + eventData)
        res = self.sf.fetchUrl(eventData,
                               timeout=self.opts['_fetchtimeout'],
                               useragent=self.opts['_useragent'],
                               sizeLimit=10000000,
                               verify=False)

        if res['content'] is None:
            self.sf.debug("Ignoring " + eventData + " as no data returned")
            return None

        matched = False
        for name in self.getTarget().getNames():
            # Search for mentions of our host/domain in the external site's data
            pat = re.compile("([\.\'\/\"\ ]" + name + "[\.\'\/\"\ ])",
                             re.IGNORECASE)
            matches = re.findall(pat, res['content'])

            if len(matches) > 0:
                matched = True
                url = eventData
                break

        if not matched:
            # If the name wasn't found in the affiliate, and checkbase is set,
            # fetch the base URL of the affiliate to check for a crossref.
            if eventName == "LINKED_URL_EXTERNAL" and self.opts['checkbase']:
                # Check the base url to see if there is an affiliation
                url = self.sf.urlBaseUrl(eventData)
                if url in self.fetched:
                    return None
                else:
                    self.fetched[url] = True

                res = self.sf.fetchUrl(url,
                                       timeout=self.opts['_fetchtimeout'],
                                       useragent=self.opts['_useragent'],
                                       sizeLimit=10000000,
                                       verify=False)
                if res['content'] is not None:
                    for name in self.getTarget().getNames():
                        pat = re.compile(
                            "([\.\'\/\"\ ]" + name + "[\'\/\"\ ])",
                            re.IGNORECASE)
                        matches = re.findall(pat, res['content'])

                        if len(matches) > 0:
                            matched = True

        if matched:
            if not event.moduleDataSource:
                event.moduleDataSource = "Unknown"
            self.sf.info("Found affiliate: " + url)
            evt1 = SpiderFootEvent("AFFILIATE_INTERNET_NAME",
                                   self.sf.urlFQDN(url), self.__name__, event)
            evt1.moduleDataSource = event.moduleDataSource
            self.notifyListeners(evt1)
            evt2 = SpiderFootEvent("AFFILIATE_WEB_CONTENT", res['content'],
                                   self.__name__, evt1)
            evt2.moduleDataSource = event.moduleDataSource
            self.notifyListeners(evt2)