Example #1
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Once we are in this state, return immediately.
        if self.errorState:
            return None

        # event was received.
        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        # Extract IBAN Card numbers
        ibanNumbers = self.sf.parseIBANNumbers(eventData)

        myres = list()
        for ibanNumber in ibanNumbers:
            evttype = "IBAN_NUMBER"

            self.sf.info("Found IBAN number : " + ibanNumber)

            if ibanNumber in myres:
                self.sf.debug("Already found from this source")
                continue
            myres.append(ibanNumber)

            evt = SpiderFootEvent(evttype, ibanNumber, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #2
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # Make potential phone numbers more friendly to parse
        content = eventData.replace('.','-')
        for match in phonenumbers.PhoneNumberMatcher(content, region=None):
            n = phonenumbers.format_number(match.number, 
                                           phonenumbers.PhoneNumberFormat.E164)
            evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #3
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Once we are in this state, return immediately.
        if self.errorState:
            return None

        # event was received.
        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # Extract Credit Card numbers
        creditCards = self.sf.parseCreditCards(eventData)

        myres = list()
        for creditCard in creditCards:
            evttype = "CREDIT_CARD_NUMBER"

            self.sf.info("Found credit card number : " + creditCard)

            if creditCard in myres:
                self.sf.debug("Already found from this source")
                continue
            myres.append(creditCard)

            evt = SpiderFootEvent(evttype, creditCard, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #4
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # Make potential phone numbers more friendly to parse
        content = eventData.replace('.', '-')
        for match in phonenumbers.PhoneNumberMatcher(content, region=None):
            n = phonenumbers.format_number(match.number,
                                           phonenumbers.PhoneNumberFormat.E164)
            evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #5
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results[sourceData] = True

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventName in ['TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS']:
            # Make potential phone numbers more friendly to parse
            content = eventData.replace('.', '-')

            for match in phonenumbers.PhoneNumberMatcher(content, region=None):
                n = phonenumbers.format_number(match.number,
                                           phonenumbers.PhoneNumberFormat.E164)
                evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)

        if eventName == 'PHONE_NUMBER':
            try:
                number = phonenumbers.parse(eventData)
            except BaseException as e:
                self.sf.debug('Error parsing phone number: ' + str(e))
                return None

            try:
                number_carrier = carrier.name_for_number(number, 'en')
            except BaseException as e:
                self.sf.debug('Error retrieving phone number carrier: ' + str(e))
                return None

            if number_carrier:
                evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier, self.__name__, event)
                self.notifyListeners(evt)
            else:
                self.sf.debug("No carrier information found for " + eventData)

            #try:
            #    location = geocoder.description_for_number(number, 'en')
            #except BaseException as e:
            #    self.sf.debug('Error retrieving phone number location: ' + str(e))
            #    return None

            #if location:
            #    evt = SpiderFootEvent("GEOINFO", location, self.__name__, event)
            #    self.notifyListeners(evt)
            #else:
            #    self.sf.debug("No location information found for " + eventData)

        return None
Example #6
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventName.startswith("EMAILADDR"):
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if type(eventData) not in [str, unicode]:
            self.sf.debug("Unhandled type to find e-mails: " +
                          str(type(eventData)))
            return None

        pat = re.compile(
            "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the doain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(mailDom):
                self.sf.debug(
                    "Ignoring e-mail address on an external domain: " + match)
                continue

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #7
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventName.startswith("EMAILADDR"):
            return None

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if type(eventData) not in [str, unicode]:
            self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData)))
            return None

        pat = re.compile("([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the doain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(mailDom):
                self.sf.debug("Ignoring e-mail address on an external domain: " + match)
                continue

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #8
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        emails = self.sf.parseEmails(eventData)
        myres = list()
        for email in emails:
            evttype = "EMAILADDR"
            email = email.lower()

            # Get the domain and strip potential ending .
            mailDom = email.split('@')[1].strip('.')
            if not self.sf.validHost(mailDom, self.opts['_internettlds']):
                self.sf.debug("Skipping " + email + " as not a valid e-mail.")
                return None

            if not self.getTarget().matches(
                    mailDom, includeChildren=True, includeParents=True
            ) and not self.getTarget().matches(email):
                self.sf.debug("External domain, so possible affiliate e-mail")
                evttype = "AFFILIATE_EMAILADDR"

            if eventName.startswith("AFFILIATE_"):
                evttype = "AFFILIATE_EMAILADDR"

            if not evttype.startswith("AFFILIATE_") and email.split(
                    "@")[0] in self.opts['_genericusers'].split(","):
                evttype = "EMAILADDR_GENERIC"

            self.sf.info("Found e-mail address: " + email)
            mail = email.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue

            myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #9
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if event.moduleDataSource:
            moduleDataSource = event.moduleDataSource
        else:
            moduleDataSource = "Unknown"

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        eventDataHash = self.sf.hashstring(eventData)

        if eventDataHash in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[eventDataHash] = True

        countryNames = list()

        # Process the event data based on incoming event type
        if eventName == "PHONE_NUMBER":
            countryNames.append(self.detectCountryFromPhone(eventData))
        elif eventName == "DOMAIN_NAME":
            countryNames.append(self.detectCountryFromDomainName(eventData))
        elif eventName == "AFFILIATE_DOMAIN_NAME" and self.opts["affiliate"]:
            countryNames.append(self.detectCountryFromDomainName(eventData))
        elif eventName == "CO_HOSTED_SITE_DOMAIN" and self.opts["cohosted"]:
            countryNames.append(self.detectCountryFromDomainName(eventData))
        elif eventName == "SIMILARDOMAIN" and self.opts["similardomain"]:
            countryNames.append(self.detectCountryFromDomainName(eventData))
        elif eventName == "IBAN_NUMBER":
            countryNames.append(self.detectCountryFromIBAN(eventData))
        elif eventName in ["DOMAIN_WHOIS", "GEOINFO", "PHYSICAL_ADDRESS"]:
            countryNames.extend(self.detectCountryFromData(eventData))
        elif eventName == "AFFILIATE_DOMAIN_WHOIS" and self.opts["affiliate"]:
            countryNames.extend(self.detectCountryFromData(eventData))
        elif eventName == "CO_HOSTED_SITE_DOMAIN_WHOIS" and self.opts["cohosted"]:
            countryNames.extend(self.detectCountryFromData(eventData))

        if not countryNames:
            self.sf.debug(f"Found no country names associated with {eventName}: {eventData}")
            return None

        for countryName in set(countryNames):
            if not countryName:
                continue

            self.sf.debug(f"Found country name: {countryName}")

            evt = SpiderFootEvent("COUNTRY_NAME", countryName, self.__name__, event)
            evt.moduleDataSource = moduleDataSource
            self.notifyListeners(evt)

        return None
Example #10
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        emails = self.sf.parseEmails(eventData)
        myres = list()
        for email in emails:
            evttype = "EMAILADDR"

            # Get the domain and strip potential ending .
            mailDom = email.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(
                    mailDom, includeChildren=True, includeParents=True
            ) and not self.getTarget().matches(match):
                self.sf.debug("External domain, so possible affiliate e-mail")
                evttype = "AFFILIATE_EMAILADDR"

            if eventName.startswith("AFFILIATE_"):
                evttype = "AFFILIATE_EMAILADDR"

            self.sf.info("Found e-mail address: " + email)
            if type(email) == str:
                mail = unicode(email.strip('.'), 'utf-8', errors='replace')
            else:
                mail = email.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue

            myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #11
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        hashes = self.sf.parseHashes(eventData)
        for hashtup in hashes:
            hashalgo, hashval = hashtup

            evt = SpiderFootEvent("HASH", "[" + hashalgo + "] " + hashval,
                                  self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            self.sf.debug(f"Skipping {eventData}, already checked.")
            return None

        self.results[sourceData] = True

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        if event.moduleDataSource:
            datasource = event.moduleDataSource
        else:
            datasource = "Unknown"

        if eventName == 'TARGET_WEB_CONTENT':
            # Google Analytics
            matches = re.findall(r"\bua\-\d{4,10}\-\d{1,4}\b", eventData,
                                 re.IGNORECASE)
            for m in matches:
                if m.lower().startswith('ua-000000-'):
                    continue
                if m.lower().startswith('ua-123456-'):
                    continue
                if m.lower().startswith('ua-12345678'):
                    continue

                self.sf.debug("Google Analytics match: " + m)
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Google Analytics: " + m, self.__name__,
                                      event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Google AdSense
            matches = re.findall(r"\b(pub-\d{10,20})\b", eventData,
                                 re.IGNORECASE)
            for m in matches:
                if m.lower().startswith('pub-12345678'):
                    continue

                self.sf.debug("Google AdSense match: " + m)
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Google AdSense: " + m, self.__name__,
                                      event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Google Website Verification
            # https://developers.google.com/site-verification/v1/getting_started
            matches = re.findall(
                r'<meta name="google-site-verification" content="([a-z0-9\-\+_=]{43,44})"',
                eventData, re.IGNORECASE)
            for m in matches:
                self.sf.debug("Google Site Verification match: " + m)
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Google Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            matches = re.findall(
                r'<meta name="verify-v1" content="([a-z0-9\-\+_=]{43,44})"',
                eventData, re.IGNORECASE)
            for m in matches:
                self.sf.debug("Google Site Verification match: " + m)
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Google Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Quantcast
            if '_qevents.push' in eventData:
                matches = re.findall(r"\bqacct:\"(p-[a-z0-9]+)\"", eventData,
                                     re.IGNORECASE)
                for m in matches:
                    self.sf.debug("Quantcast match: " + m)
                    evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                          "Quantcast: " + m, self.__name__,
                                          event)
                    evt.moduleDataSource = datasource
                    self.notifyListeners(evt)

            # Ahrefs Site Verification
            matches = re.findall(
                r'<meta name="ahrefs-site-verification" content="([a-f0-9]{64})"',
                eventData, re.IGNORECASE)
            for m in matches:
                self.sf.debug("Ahrefs Site Verification match: " + m)
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Ahrefs Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

        if eventName == 'DNS_TEXT':
            # Google Website Verification
            # https://developers.google.com/site-verification/v1/getting_started
            matches = re.findall(
                r'google-site-verification=([a-z0-9\-\+_=]{43,44})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Google Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # LogMeIn Domain Verification
            # https://support.logmeininc.com/openvoice/help/adding-a-txt-record-to-a-dns-server-ov710011
            matches = re.findall(
                r'logmein-domain-confirmation ([A-Z0-9]{24})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "LogMeIn Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            matches = re.findall(
                r'logmein-verification-code=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "LogMeIn Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # DocuSign Domain Verification
            # https://support.docusign.com/en/guides/org-admin-guide-domains
            matches = re.findall(
                r'docusign=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "DocuSign Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # GlobalSign Site Verification
            # https://support.globalsign.com/customer/en/portal/articles/2167245-performing-domain-verification---dns-txt-record
            matches = re.findall(
                r'globalsign-domain-verification=([a-z0-9\-\+_=]{42,44})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "GlobalSign Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Atlassian Domain Verification
            # https://confluence.atlassian.com/cloud/verify-a-domain-for-your-organization-873871234.html
            matches = re.findall(
                r'atlassian-domain-verification=([a-z0-9\-\+\/_=]{64})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Atlassian Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Adobe IDP Site Verification
            # https://helpx.adobe.com/au/enterprise/using/verify-domain-ownership.html
            matches = re.findall(
                r'adobe-idp-site-verification=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Adobe IDP Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            matches = re.findall(
                r'adobe-idp-site-verification=([a-f0-9]{64})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Adobe IDP Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Adobe Domain Verification
            # https://helpx.adobe.com/sign/help/domain_claiming.html
            matches = re.findall(r'adobe-sign-verification=([a-f0-9]{32})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Adobe Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Stripe Domain Verification
            # https://stripe.com/docs/apple-pay/web#going-live
            matches = re.findall(r'stripe-verification=([a-f0-9]{64})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Stripe Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # TeamViewer SSO Verification
            # https://community.teamviewer.com/t5/Knowledge-Base/Single-Sign-On-SSO/ta-p/30784
            matches = re.findall(
                r'teamviewer-sso-verification=([a-f0-9]{32})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "TeamViewer SSO Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Aliyun Site Verification
            matches = re.findall(
                r'aliyun-site-verification=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Aliyun Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Facebook Domain Verification
            # https://developers.facebook.com/docs/sharing/domain-verification/
            matches = re.findall(
                r'facebook-domain-verification=([a-z0-9]{30})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Facebook Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Citrix Domain Verification
            matches = re.findall(
                r'citrix-verification-code=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Citrix Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Dropbox Domain Verification
            # https://help.dropbox.com/teams-admins/admin/domain-insights-account-capture#verify
            matches = re.findall(
                r'dropbox-domain-verification=([a-z0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Dropbox Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Detectify Domain Verification
            # https://support.detectify.com/customer/en/portal/articles/2836806-verification-with-dns-txt-
            matches = re.findall(r'detectify-verification=([a-f0-9]{32})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Detectify Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Drift Domain Verification
            matches = re.findall(r'drift-verification=([a-f0-9]{64})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Drift Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Ahrefs Site Verification
            # https://help.ahrefs.com/en/articles/1431155-how-do-i-finish-crawling-my-website-faster-in-site-audit
            matches = re.findall(r'ahrefs-site-verification_([a-f0-9]{64})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Ahrefs Site Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Statuspage.io Domain Verification
            # https://help.statuspage.io/help/domain-ownership
            matches = re.findall(
                r'status-page-domain-verification=([a-z0-9]{12})$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Statuspage Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Zoom.us Domain Verification
            # https://support.zoom.us/hc/en-us/articles/203395207-What-is-Managed-Domain-
            matches = re.findall(r'ZOOM_verify_([a-z0-9\-\+\/_=]{22})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Zoom.us Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Mail.ru Domain Verification
            matches = re.findall(r'mailru-verification: ([a-z0-9]{16})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Mail.ru Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Yandex Domain Verification
            matches = re.findall(r'yandex-verification: ([a-z0-9]{16})$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Yandex Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Brave Ledger Verification
            # https://support.brave.com/hc/en-us/articles/360021408352-How-do-I-verify-my-channel-
            matches = re.findall(r'brave-ledger-verification=([a-z0-9]+)$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Brave Ledger Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # have-i-been-pwned Verification
            matches = re.findall(
                r'have-i-been-pwned-verification=([a-f0-9]+)$',
                eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "have-i-been-pwned Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

            # Cisco Live Domain Verification
            # https://www.ciscolive.com/c/dam/r/ciscolive/us/docs/2016/pdf/TECCOL-2982.pdf
            matches = re.findall(r'cisco-ci-domain-verification=([a-f0-9]+)$',
                                 eventData.strip(), re.IGNORECASE)
            for m in matches:
                evt = SpiderFootEvent("WEB_ANALYTICS_ID",
                                      "Cisco Live Domain Verification: " + m,
                                      self.__name__, event)
                evt.moduleDataSource = datasource
                self.notifyListeners(evt)

        return None
Example #13
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.actualSource
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(
                        map(unicode.capitalize,
                            eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile(
            "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)"
        )
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug(
                    "Both first and second names are not in the dictionary, so high chance of name: ("
                    + first + ":" + second + ").")
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second +
                              " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Example #14
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        evttype = "COUNTRY_NAME"

        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        # Generate event data hash
        eventDataHash = self.sf.hashstring(eventData)
        # Do not parse duplicate incoming data
        if eventDataHash in self.results:
            self.sf.debug("Already found from this source")
            return None

        self.results[eventDataHash] = True

        countryNames = list()

        # Process the event data based on incoming event type
        if eventName == "PHONE_NUMBER":
            countryNames.append(self.detectCountryFromPhone(eventData))
        elif eventName == "DOMAIN_NAME" or (
                eventName == "AFFILIATE_DOMAIN_NAME" and self.opts["affiliate"]
        ) or (eventName == "CO_HOSTED_SITE_DOMAIN"
              and self.opts["cohosted"]) or (eventName == "SIMILARDOMAIN"
                                             and self.opts["similardomain"]):
            countryNames.append(self.detectCountryFromTLD(eventData))
        elif eventName == "IBAN_NUMBER":
            countryNames.append(self.detectCountryFromIBAN(eventData))
        elif eventName in ["DOMAIN_WHOIS", "GEOINFO", "PHYSICAL_ADDRESS"
                           ] or (eventName == "AFFILIATE_DOMAIN_WHOIS"
                                 and self.opts["affiliate"]) or (
                                     eventName == "CO_HOSTED_SITE_DOMAIN_WHOIS"
                                     and self.opts["cohosted"]):
            tempDataList = self.detectCountryFromData(eventData)
            if tempDataList is None:
                countryNames.append(None)
            else:
                countryNames.extend(tempDataList)

        # Check if countryNames is empty
        if len(countryNames) == 0:
            return None

        # Convert list to set to remove duplicates
        countryNames = set(countryNames)

        for countryName in countryNames:
            if countryName == '' or countryName == None:
                continue
            self.sf.debug("Found country name: " + countryName)

            evt = SpiderFootEvent(evttype, countryName, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)
        return None
Example #15
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.sourceEvent.data
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(map(str.capitalize, eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)")
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug("Both first and second names are not in the dictionary, so high chance of name: (" + first +":" + second +").")
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second + " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Example #16
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.actualSource
            if url is not None:
                if self.opts['filterjscss'] and (".js" in url
                                                 or ".css" in url):
                    self.sf.debug("Ignoring web content from CSS/JS.")
                    return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == str:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                    name = str(name)

                # Names don't have numbers
                if re.match("[0-9]*", name):
                    return None

                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # For RAW_RIR_DATA, there are only specific modules we
        # expect to see RELEVANT names within.
        if eventName == "RAW_RIR_DATA":
            if srcModuleName not in [
                    "sfp_arin", "sfp_builtwith", "sfp_clearbit",
                    "sfp_fullcontact", "sfp_github", "sfp_hunter",
                    "sfp_opencorporates", "sfp_slideshare", "sfp_twitter",
                    "sfp_venmo", "sfp_instagram"
            ]:
                self.sf.debug("Ignoring RAW_RIR_DATA from untrusted module.")
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile(
            r"([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)"
        )
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug(
                    f"Both first and second names are not in the dictionary, so high chance of name: ({first}:{second})."
                )
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second +
                              " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Example #17
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(map(str.capitalize, eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)")
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                p += 75
                notindict = True

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            if p > self.opts['algotune']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Example #18
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # The SIMILARDOMAIN and CO_HOSTED_SITE events supply domains,
        # not URLs. Assume HTTP.
        if eventName in ['SIMILARDOMAIN', 'CO_HOSTED_SITE']:
            eventData = 'http://' + eventData.lower()

        # We are only interested in external sites for the crossref
        if self.getTarget().matches(self.sf.urlFQDN(eventData)):
            self.sf.debug("Ignoring " + eventData + " as not external")
            return None

        if eventData in self.fetched:
            self.sf.debug("Ignoring " + eventData + " as already tested")
            return
        else:
            self.fetched[eventData] = True

        self.sf.debug("Testing for affiliation: " + eventData)
        res = self.sf.fetchUrl(eventData, timeout=self.opts['_fetchtimeout'],
                               useragent=self.opts['_useragent'], sizeLimit=10000000)

        if res['content'] is None:
            self.sf.debug("Ignoring " + eventData + " as no data returned")
            return None

        matched = False
        for name in self.getTarget().getNames():
            # Search for mentions of our host/domain in the external site's data
            pat = re.compile("([\.\'\/\"\ ]" + name + "[\.\'\/\"\ ])", re.IGNORECASE)
            matches = re.findall(pat, res['content'])

            if len(matches) > 0:
                matched = True
                url = eventData
                break

        if not matched:
            # If the name wasn't found in the affiliate, and checkbase is set,
            # fetch the base URL of the affiliate to check for a crossref.
            if eventName == "LINKED_URL_EXTERNAL" and self.opts['checkbase']:
                # Check the base url to see if there is an affiliation
                url = self.sf.urlBaseUrl(eventData)
                if url in self.fetched:
                    return None
                else:
                    self.fetched[url] = True

                res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'],
                                       useragent=self.opts['_useragent'],
                                       sizeLimit=10000000)
                if res['content'] is not None:
                    for name in self.getTarget().getNames():
                        pat = re.compile("([\.\'\/\"\ ]" + name + "[\'\/\"\ ])",
                                         re.IGNORECASE)
                        matches = re.findall(pat, res['content'])

                        if len(matches) > 0:
                            matched = True

        if matched:
            if not event.moduleDataSource:
                event.moduleDataSource = "Unknown"
            self.sf.info("Found affiliate: " + url)
            evt1 = SpiderFootEvent("AFFILIATE_INTERNET_NAME", self.sf.urlFQDN(url),
                                   self.__name__, event)
            evt1.moduleDataSource = event.moduleDataSource
            self.notifyListeners(evt1)
            evt2 = SpiderFootEvent("AFFILIATE_WEB_CONTENT", res['content'],
                                   self.__name__, evt1)
            evt2.moduleDataSource = event.moduleDataSource
            self.notifyListeners(evt2)
Example #19
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Various ways to identify companies in text
        # Support up to three word company names with each starting with
        # a capital letter, allowing for hyphens brackets and numbers within.
        pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+"
        pattern_match_re = [
            'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 
            'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation',
            'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?',
            'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ]
        pattern_match = [
            'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty',
            'Ltd', 'Pte', 'Inc', 'INC', 'Foundation',
            'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V',
            'NV', 'N.V' 'PLC', 'Limited', 'Pvt.', 'SARL' ]

        pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])"

        # Filter out anything from the company name which matches the below
        filterpatterns = [
            "Copyright",
            "\d{4}" # To catch years
        ]

        # Don't re-parse company names
        if eventName in [ "COMPANY_NAME", "AFFILIATE_COMPANY_NAME" ]:
            return None

        if eventName == "TARGET_WEB_CONTENT":
            url = event.sourceEvent.data
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.")

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [ list, dict ]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find company names: " + \
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " + str(e))
                return None

        # Strip out everything before the O=
        try:
            if eventName == "SSL_CERTIFICATE_ISSUED":
                eventData = eventData.split("O=")[1]
        except BaseException as e:
                self.sf.debug("Couldn't strip out O=, proceeding anyway...")

        # Find chunks of text containing what might be a company name first.
        # This is to avoid running very expensive regexps on large chunks of
        # data.
        chunks = list()
        for pat in pattern_match:
            start = 0
            m = eventData.find(pat, start)
            while m > 0:
                start = m - 50
                if start < 0:
                    start = 0
                end = m + 10
                if end >= len(eventData):
                    end = len(eventData)-1
                chunks.append(eventData[start:end])
                offset = m + len(pat)
                m = eventData.find(pat, offset)

        myres = list()
        for chunk in chunks:
            for pat in pattern_match_re:
                matches = re.findall(pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk, re.MULTILINE|re.DOTALL)
                for match in matches:
                    matched = 0
                    for m in match:
                        if len(m) > 0:
                            matched += 1
                    if matched <= 1:
                        continue

                    fullcompany = ""
                    for m in match:
                        flt = False
                        for f in filterpatterns:
                            if re.match(f, m):
                               flt = True 
                        if not flt:
                            fullcompany += m + " "

                    fullcompany = re.sub("\s+", " ", fullcompany.strip())
                    
                    self.sf.info("Found company name: " + fullcompany)
                    if fullcompany in myres:
                        self.sf.debug("Already found from this source.")
                        continue
                    else:
                        myres.append(fullcompany)

                    if "AFFILIATE_" in eventName:
                        etype = "AFFILIATE_COMPANY_NAME"
                    else:
                        etype = "COMPANY_NAME"

                    evt = SpiderFootEvent(etype, fullcompany, self.__name__, event)
                    if event.moduleDataSource:
                        evt.moduleDataSource = event.moduleDataSource
                    else:
                        evt.moduleDataSource = "Unknown"
                    self.notifyListeners(evt)
Example #20
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(map(str.capitalize, eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)")
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                p += 75
                notindict = True

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            if p > self.opts['algotune']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Example #21
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Various ways to identify companies in text
        # Support up to three word company names with each starting with
        # a capital letter, allowing for hyphens brackets and numbers within.
        pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+"
        pattern_match_re = [
            'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?',
            'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated',
            'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA',
            'BV', 'B\.V\.?', 'NV', 'N\.V\.?'
            'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL'
        ]
        pattern_match = [
            'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty', 'Ltd', 'Pte', 'Inc',
            'INC', 'Foundation', 'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V', 'NV',
            'N.V'
            'PLC', 'Limited', 'Pvt.', 'SARL'
        ]

        pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])"

        # Filter out anything from the company name which matches the below
        filterpatterns = [
            "Copyright",
            "\d{4}"  # To catch years
        ]

        # Don't re-parse company names
        if eventName in ["COMPANY_NAME", "AFFILIATE_COMPANY_NAME"]:
            return None

        if eventName == "TARGET_WEB_CONTENT":
            url = event.sourceEvent.data
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName + ": " + str(len(eventData)) + " bytes.")

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [list, dict]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find company names: " + \
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " +
                              str(e))
                return None

        # Strip out everything before the O=
        try:
            if eventName == "SSL_CERTIFICATE_ISSUED":
                eventData = eventData.split("O=")[1]
        except BaseException as e:
            self.sf.debug("Couldn't strip out O=, proceeding anyway...")

        # Find chunks of text containing what might be a company name first.
        # This is to avoid running very expensive regexps on large chunks of
        # data.
        chunks = list()
        for pat in pattern_match:
            start = 0
            m = eventData.find(pat, start)
            while m > 0:
                start = m - 50
                if start < 0:
                    start = 0
                end = m + 10
                if end >= len(eventData):
                    end = len(eventData) - 1
                chunks.append(eventData[start:end])
                offset = m + len(pat)
                m = eventData.find(pat, offset)

        myres = list()
        for chunk in chunks:
            for pat in pattern_match_re:
                matches = re.findall(
                    pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk,
                    re.MULTILINE | re.DOTALL)
                for match in matches:
                    matched = 0
                    for m in match:
                        if len(m) > 0:
                            matched += 1
                    if matched <= 1:
                        continue

                    fullcompany = ""
                    for m in match:
                        flt = False
                        for f in filterpatterns:
                            if re.match(f, m):
                                flt = True
                        if not flt:
                            fullcompany += m + " "

                    fullcompany = re.sub("\s+", " ", fullcompany.strip())

                    self.sf.info("Found company name: " + fullcompany)
                    if fullcompany in myres:
                        self.sf.debug("Already found from this source.")
                        continue
                    else:
                        myres.append(fullcompany)

                    if "AFFILIATE_" in eventName:
                        etype = "AFFILIATE_COMPANY_NAME"
                    else:
                        etype = "COMPANY_NAME"

                    evt = SpiderFootEvent(etype, fullcompany, self.__name__,
                                          event)
                    if event.moduleDataSource:
                        evt.moduleDataSource = event.moduleDataSource
                    else:
                        evt.moduleDataSource = "Unknown"
                    self.notifyListeners(evt)
Example #22
0
    def handleEvent(self, event):
        # The three most used fields in SpiderFootEvent are:
        # event.eventType - the event type, e.g. INTERNET_NAME, IP_ADDRESS, etc.
        # event.module - the name of the module that generated the event, e.g. sfp_dnsresolve
        # event.data - the actual data, e.g. 127.0.0.1. This can sometimes be megabytes in size (e.g. a PDF)
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Once we are in this state, return immediately.
        if self.errorState:
            return None

        # Log this before complaining about a missing API key so we know the
        # event was received.
        self.sf.debug("Received event, %s, from %s" %
                      (eventName, srcModuleName))

        # Always check if the API key is set and complain if it isn't, then set
        # self.errorState to avoid this being a continual complaint during the scan.
        if self.opts['api_key'] == "":
            self.sf.error(
                "You enabled sfp_template but did not set an API key!", False)
            self.errorState = True
            return None

        # Don't look up stuff twice
        if eventData in self.results:
            self.sf.debug("Skipping " + eventData + " as already mapped.")
            return None
        else:
            # If eventData might be something large, set the key to a hash
            # of the value instead of the value, to avoid memory abuse.
            self.results[eventData] = True

        if eventName == 'NETBLOCK_OWNER':
            # Note here an example of handling the netblocklookup option
            if not self.opts['netblocklookup']:
                return None
            else:
                if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']:
                    self.sf.debug("Network size bigger than permitted: " +
                                  str(IPNetwork(eventData).prefixlen) + " > " +
                                  str(self.opts['maxnetblock']))
                    return None

        # When handling netblocks/subnets, assuming the user set
        # netblocklookup/subnetlookup to True, we need to expand it
        # to the IPs for looking up.
        if eventName.startswith("NETBLOCK_"):
            for ipaddr in IPNetwork(eventData):
                qrylist.append(str(ipaddr))
                self.results[str(ipaddr)] = True
        else:
            qrylist.append(eventData)

        for addr in qrylist:
            # Perform the query to the third party; in this case for each IP
            # being queried.
            rec = self.query(addr)

            # Handle the response being empty/failing
            if rec is None:
                continue

            # For netblocks, we need to create the IP address event so that
            # the threat intel event is more meaningful and linked to the
            # IP address within the network, not the whole network.
            if eventName.startswith('NETBLOCK_'):
                # This is where the module generates an event for other modules
                # to process and is a fundamental part of the SpiderFoot architecture.
                # We are generating an event of type "IP_ADDRESS" here, the data being
                # the addr variable, the name of the module is the next argument
                # (self.__name__), and finally the event that is linked as the source
                # event of this event. This enables SpiderFoot to link events so users
                # can see what events generated other events, seeing a full chain of
                # discovery from their target to the data returned here.
                pevent = SpiderFootEvent("IP_ADDRESS", addr, self.__name__,
                                         event)
                # With the event created, we can now notify any other modules listening
                # for IP_ADDRESS events (which they define in their watchedEvents()
                # function).
                self.notifyListeners(pevent)
            else:
                # If the event received wasn't a netblock, then use that event
                # as the source event for later events.
                pevent = event

            # When querying a third party API, always ensure to generate
            # a RAW_RIR_DATA event. Note that here we are seeing the pevent
            # event as the source for this, since the IP address is actually
            # what was queried against the third party, not the netblock.
            # So now we have NETBLOCK_OWNER (event we received) -> IP_ADDRESS
            # (event we generated above) -> RAW_RIR_DATA (event from the third
            # party about the IP Address we queried).
            evt = SpiderFootEvent("RAW_RIR_DATA", str(rec), self.__name__,
                                  pevent)
            self.notifyListeners(evt)

            # Whenever operating in a loop, call this to check whether the user
            # requested the scan to be aborted.
            if self.checkForStop():
                return None

            # In some cases, you want to override the data source for the event
            # you're producing to be the data source of the event that you've
            # received. This is needed, for example, when the module is purely
            # extracting data from a received event, so the data source is not
            # actually this module, but the data source of the received event
            # itself! sfp_email is a good example, since it is purely looking
            # for e-mail addresses in received content, so an EMAILADDR event
            # should have a data source of whatever place the EMAILADDR was
            # actually found in. This is how you'd achieve that:
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                # This should never happen, but just to be safe since other
                # code might depend on this field existing and not being None.
                evt.moduleDataSource = "Unknown"

            # Note that we are using rec.get('os') instead of rec['os'] - this
            # means we won't get an exception if the 'os' key doesn't exist. In
            # general, you should always use .get() instead of accessing keys
            # directly in case the key doesn't exist.
            os = rec.get('os')
            if os:
                evt = SpiderFootEvent("OPERATING_SYSTEM", f"{os} ({addr})",
                                      self.__name__, pevent)
                self.notifyListeners(evt)
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Don't re-parse e-mail addresses
        if "EMAILADDR" in eventName:
            return None

        # Ignore any web content that isn't from the target. This avoids noise from
        # pastebin and other content where unrelated e-mails are likely to be found.
        if "_CONTENT" in eventName and eventName != "TARGET_WEB_CONTENT":
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [list, dict]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find e-mails: " +
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " +
                              str(e))
                return None

        pat = re.compile(
            "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            evttype = "EMAILADDR"
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the domain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(
                    mailDom) and not self.getTarget().matches(match):
                self.sf.debug("External domain, so possible affiliate e-mail")
                # Raw RIR data returning external e-mails generates way
                # too much noise.
                if eventName == "RAW_RIR_DATA":
                    return None
                evttype = "AFFILIATE_EMAILADDR"

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Example #24
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Various ways to identify companies in text
        # Support up to three word company names with each starting with
        # a capital letter, allowing for hyphens brackets and numbers within.
        pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,]?[^ \"\';:><]*)?\s+"
        pattern_match = [
            'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?',
            'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated',
            'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA',
            'BV', 'B\.V\.?', 'NV', 'N\.V\.?'
            'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL'
        ]
        pattern_suffix = "(?=[ \.,:<\)\'\"]|$)"

        # Filter out anything from the company name which matches the below
        filterpatterns = [
            "Copyright",
            "\d{4}"  # To catch years
        ]

        # Don't re-parse company names
        if eventName == "COMPANY_NAME":
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName + ": " + str(len(eventData)) + " bytes.")

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [list, dict]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find company names: " + \
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " +
                              str(e))
                return None

        # Strip out everything before the O=
        try:
            if eventName == "SSL_CERTIFICATE_ISSUED":
                eventData = eventData.split("O=")[1]
        except BaseException as e:
            self.sf.debug("Couldn't strip out O=, proceeding anyway...")

        myres = list()
        for pat in pattern_match:
            matches = re.findall(
                pattern_prefix + "(" + pat + ")" + pattern_suffix, eventData,
                re.MULTILINE)
            for match in matches:
                matched = 0
                for m in match:
                    if len(m) > 0:
                        matched += 1
                if matched <= 1:
                    continue

                fullcompany = ""
                for m in match:
                    flt = False
                    for f in filterpatterns:
                        if re.match(f, m):
                            flt = True
                    if not flt:
                        fullcompany += m + " "

                fullcompany = re.sub("\s+", " ", fullcompany.strip())

                self.sf.info("Found company name: " + fullcompany)
                if fullcompany in myres:
                    self.sf.debug("Already found from this source.")
                    continue
                else:
                    myres.append(fullcompany)

                evt = SpiderFootEvent("COMPANY_NAME", fullcompany,
                                      self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)