Example #1
0
    def scanviz(self, id, gexf="0"):
        """Export entities from scan results for visualising

        Args:
            id (str): scan ID
            gexf (str): TBD

        Returns:
            string: GEXF data
        """

        if not id:
            return None

        dbh = SpiderFootDb(self.config)
        data = dbh.scanResultEvent(id, filterFp=True)
        scan = dbh.scanInstanceGet(id)

        if not scan:
            return None

        root = scan[1]

        if gexf == "0":
            return SpiderFootHelpers.buildGraphJson([root], data)

        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
        cherrypy.response.headers['Content-Type'] = "application/gexf"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return SpiderFootHelpers.buildGraphGexf([root], "SpiderFoot Export", data)
 def test_target_type(self):
     """
     Test targetType(target)
     """
     target_type = SpiderFootHelpers.targetTypeFromString("0.0.0.0")
     self.assertEqual('IP_ADDRESS', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString(
         "*****@*****.**")
     self.assertEqual('EMAILADDR', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString("0.0.0.0/0")
     self.assertEqual('NETBLOCK_OWNER', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString("+1234567890")
     self.assertEqual('PHONE_NUMBER', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString('"Human Name"')
     self.assertEqual('HUMAN_NAME', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString('"abc123"')
     self.assertEqual('USERNAME', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString("1234567890")
     self.assertEqual('BGP_AS_OWNER', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString("::1")
     self.assertEqual('IPV6_ADDRESS', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString("spiderfoot.net")
     self.assertEqual('INTERNET_NAME', target_type)
     target_type = SpiderFootHelpers.targetTypeFromString(
         "1HesYJSP1QqcyPEjnQ9vzBL1wujruNGe7R")
     self.assertEqual('BITCOIN_ADDRESS', target_type)
 def test_buildGraphData_data_row_with_invalid_number_of_columns_should_raise_ValueError(
         self):
     """
     Test buildGraphData(data, flt=list())
     """
     with self.assertRaises(ValueError):
         SpiderFootHelpers.buildGraphData([['only one column']])
Example #4
0
    def rerunscan(self, id):
        """Rerun a scan

        Args:
            id (str): scan ID

        Returns:
            None

        Raises:
            HTTPRedirect: redirect to info page for new scan
        """

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modlist = list()
        dbh = SpiderFootDb(cfg)
        info = dbh.scanInstanceGet(id)

        if not info:
            return self.error("Invalid scan ID.")

        scanname = info[0]
        scantarget = info[1]

        scanconfig = dbh.scanConfigGet(id)
        if not scanconfig:
            return self.error(f"Error loading config from scan: {id}")

        modlist = scanconfig['_modulesenabled'].split(',')
        if "sfp__stor_stdout" in modlist:
            modlist.remove("sfp__stor_stdout")

        targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
        if not targetType:
            # It must then be a name, as a re-run scan should always have a clean
            # target. Put quotes around the target value and try to determine the
            # target type again.
            targetType = SpiderFootHelpers.targetTypeFromString(f'"{scantarget}"')

        if targetType not in ["HUMAN_NAME", "BITCOIN_ADDRESS"]:
            scantarget = scantarget.lower()

        # Start running a new scan
        scanId = SpiderFootHelpers.genScanInstanceId()
        try:
            p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
            p.daemon = True
            p.start()
        except Exception as e:
            self.log.error(f"[-] Scan [{scanId}] failed: {e}")
            return self.error(f"[-] Scan [{scanId}] failed: {e}")

        # Wait until the scan has initialized
        while dbh.scanInstanceGet(scanId) is None:
            self.log.info("Waiting for the scan to initialize...")
            time.sleep(1)

        raise cherrypy.HTTPRedirect(f"{self.docroot}/scaninfo?id={scanId}", status=302)
 def test_buildGraphData_invalid_data_type_should_raise_TypeError(self):
     """
     Test buildGraphData(data, flt=list())
     """
     invalid_types = [None, "", dict(), int()]
     for invalid_type in invalid_types:
         with self.subTest(invalid_type=invalid_type):
             with self.assertRaises(TypeError):
                 SpiderFootHelpers.buildGraphData(invalid_type)
 def test_dataParentChildToTree_invalid_data_type_should_return_TypeError(
         self):
     """
     Test dataParentChildToTree(data)
     """
     invalid_types = [None, "", list(), int()]
     for invalid_type in invalid_types:
         with self.subTest(invalid_type=invalid_type):
             with self.assertRaises(TypeError):
                 SpiderFootHelpers.dataParentChildToTree(invalid_type)
Example #7
0
    def rerunscanmulti(self, ids):
        """Rerun scans

        Args:
            ids (str): comma separated list of scan IDs

        Returns:
            None
        """

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modlist = list()
        dbh = SpiderFootDb(cfg)

        for id in ids.split(","):
            info = dbh.scanInstanceGet(id)
            if not info:
                return self.error("Invalid scan ID.")

            scanconfig = dbh.scanConfigGet(id)
            scanname = info[0]
            scantarget = info[1]
            targetType = None

            if len(scanconfig) == 0:
                return self.error("Something went wrong internally.")

            modlist = scanconfig['_modulesenabled'].split(',')
            if "sfp__stor_stdout" in modlist:
                modlist.remove("sfp__stor_stdout")

            targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
            if targetType is None:
                # Should never be triggered for a re-run scan..
                return self.error("Invalid target type. Could not recognize it as a target SpiderFoot supports.")

            # Start running a new scan
            scanId = SpiderFootHelpers.genScanInstanceId()
            try:
                p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
                p.daemon = True
                p.start()
            except Exception as e:
                self.log.error(f"[-] Scan [{scanId}] failed: {e}")
                return self.error(f"[-] Scan [{scanId}] failed: {e}")

            # Wait until the scan has initialized
            while dbh.scanInstanceGet(scanId) is None:
                self.log.info("Waiting for the scan to initialize...")
                time.sleep(1)

        templ = Template(filename='spiderfoot/templates/scanlist.tmpl', lookup=self.lookup)
        return templ.render(rerunscans=True, docroot=self.docroot, pageid="SCANLIST", version=__version__)
    def test_extractCreditCardsFromText_should_return_a_list(self):
        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                cards = SpiderFootHelpers.extractCreditCardsFromText(
                    invalid_type)
                self.assertIsInstance(cards, list)

        cards = SpiderFootHelpers.extractCreditCardsFromText(
            "spiderfoot4111 1111 1111 1111spiderfoot")
        self.assertIsInstance(cards, list)
        self.assertEqual(["4111111111111111"], cards)
    def test_target_type_invalid_seed_should_return_none(self):
        """
        Test targetType(target)
        """
        target_type = SpiderFootHelpers.targetTypeFromString(None)
        self.assertEqual(None, target_type)

        target_type = SpiderFootHelpers.targetTypeFromString("")
        self.assertEqual(None, target_type)

        target_type = SpiderFootHelpers.targetTypeFromString('""')
        self.assertEqual(None, target_type)
    def test_extractUrlsFromText_should_extract_urls_from_string(self):
        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                cards = SpiderFootHelpers.extractUrlsFromText(invalid_type)
                self.assertIsInstance(cards, list)

        urls = SpiderFootHelpers.extractUrlsFromText(
            "abchttps://example.spiderfoot.net/path\rabchttp://example.spiderfoot.net:1337/path\rabc"
        )
        self.assertIsInstance(urls, list)
        self.assertIn("https://example.spiderfoot.net/path", urls)
        self.assertIn("http://example.spiderfoot.net:1337/path", urls)
    def test_validLEI_should_return_a_boolean(self):
        invalid_types = [None, "", list(), dict(), int()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                valid_phone = SpiderFootHelpers.validLEI(invalid_type)
                self.assertIsInstance(valid_phone, bool)
                self.assertFalse(valid_phone)

        valid_lei = SpiderFootHelpers.validLEI('7ZW8QJWVPR4P1J1KQYZZ')
        self.assertIsInstance(valid_lei, bool)
        self.assertFalse(valid_lei)

        valid_lei = SpiderFootHelpers.validLEI('7ZW8QJWVPR4P1J1KQY45')
        self.assertIsInstance(valid_lei, bool)
        self.assertTrue(valid_lei)
    def test_extractUrlsFromRobotsTxt_should_return_list(self):
        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                robots_txt = SpiderFootHelpers.extractUrlsFromRobotsTxt(
                    invalid_type)
                self.assertIsInstance(robots_txt, list)

        robots_txt = SpiderFootHelpers.extractUrlsFromRobotsTxt("disallow:")
        self.assertIsInstance(robots_txt, list)
        self.assertFalse(robots_txt)

        robots_txt = SpiderFootHelpers.extractUrlsFromRobotsTxt(
            "disallow: /disallowed/path\n")
        self.assertIsInstance(robots_txt, list)
        self.assertIn("/disallowed/path", robots_txt)
Example #13
0
    def detectCountryFromPhone(self, srcPhoneNumber: str) -> str:
        """Lookup name of country from phone number region code.

        Args:
            srcPhoneNumber (str): phone number

        Returns:
            str: country name
        """

        if not isinstance(srcPhoneNumber, str):
            return None

        try:
            phoneNumber = phonenumbers.parse(srcPhoneNumber)
        except Exception:
            self.debug(f"Skipped invalid phone number: {srcPhoneNumber}")
            return None

        try:
            countryCode = region_code_for_country_code(
                phoneNumber.country_code)
        except Exception:
            self.debug(
                f"Lookup of region code failed for phone number: {srcPhoneNumber}"
            )
            return None

        if not countryCode:
            return None

        return SpiderFootHelpers.countryNameFromCountryCode(
            countryCode.upper())
Example #14
0
    def scanvizmulti(self, ids, gexf="1"):
        """Export entities results from multiple scans in GEXF format

        Args:
            ids (str): scan IDs
            gexf (str): TBD

        Returns:
            string: GEXF data
        """
        dbh = SpiderFootDb(self.config)
        data = list()
        roots = list()

        if not ids:
            return None

        for id in ids.split(','):
            data = data + dbh.scanResultEvent(id, filterFp=True)
            scan = dbh.scanInstanceGet(id)
            if scan:
                roots.append(scan[1])

        if gexf == "0":
            # Not implemented yet
            return None

        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
        cherrypy.response.headers['Content-Type'] = "application/gexf"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return SpiderFootHelpers.buildGraphGexf(roots, "SpiderFoot Export", data)
Example #15
0
    def scanelementtypediscovery(self, id, eventType):
        """Scan element type discovery.

        Args:
            id (str): scan ID
            eventType (str): filter by event type

        Returns:
            str: JSON
        """
        dbh = SpiderFootDb(self.config)
        pc = dict()
        datamap = dict()
        retdata = dict()

        # Get the events we will be tracing back from
        try:
            leafSet = dbh.scanResultEvent(id, eventType)
            [datamap, pc] = dbh.scanElementSourcesAll(id, leafSet)
        except Exception:
            return retdata

        # Delete the ROOT key as it adds no value from a viz perspective
        del pc['ROOT']
        retdata['tree'] = SpiderFootHelpers.dataParentChildToTree(pc)
        retdata['data'] = datamap

        return retdata
 def test_countryNameFromTld_argument_tld_should_return_country_as_a_string(
         self):
     tlds = ['com', 'net', 'org', 'gov', 'mil']
     for tld in tlds:
         with self.subTest(tld=tld):
             country_name = SpiderFootHelpers.countryNameFromTld(tld)
             self.assertIsInstance(country_name, str)
             self.assertEqual(country_name, "United States")
 def test_extractEmailsFromText_should_return_list_of_emails_from_string(
         self):
     emails = SpiderFootHelpers.extractEmailsFromText(
         "<html><body><p>From:[email protected]</p><p>Subject:Hello [email protected], here's some text</p></body></html>"
     )
     self.assertIsInstance(emails, list)
     self.assertIn('*****@*****.**', emails)
     self.assertIn('*****@*****.**', emails)
    def test_buildGraphData_should_return_a_set(self):
        graph_data = SpiderFootHelpers.buildGraphData([[
            "test", "test", "test", "test", "test", "test", "test", "test",
            "test", "test", "test", "test", "test", "test", "test"
        ]])
        self.assertIsInstance(graph_data, set)

        self.assertEqual('TBD', 'TBD')
Example #19
0
    def test_parse_robots_txt_should_return_list(self):
        """
        Test parseRobotsTxt(robotsTxtData)
        """
        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                robots_txt = SpiderFootHelpers.parseRobotsTxt(invalid_type)
                self.assertIsInstance(robots_txt, list)

        robots_txt = SpiderFootHelpers.parseRobotsTxt("disallow:")
        self.assertIsInstance(robots_txt, list)
        self.assertFalse(robots_txt)

        robots_txt = SpiderFootHelpers.parseRobotsTxt(
            "disallow: /disallowed/path\n")
        self.assertIsInstance(robots_txt, list)
        self.assertIn("/disallowed/path", robots_txt)
Example #20
0
    def test_dataParentChildToTree_should_return_dict(self):
        """
        Test dataParentChildToTree(data)
        """
        invalid_types = [None, "", list(), int()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                tree = SpiderFootHelpers.dataParentChildToTree(invalid_type)
                self.assertIsInstance(tree, dict)

        tree = SpiderFootHelpers.dataParentChildToTree(dict())
        self.assertIsInstance(tree, dict)

        tree = SpiderFootHelpers.dataParentChildToTree(
            {"test": {
                "123": "456"
            }})
        self.assertIsInstance(tree, dict)
 def test_dataParentChildToTree_should_return_dict(self):
     """
     Test dataParentChildToTree(data)
     """
     tree = SpiderFootHelpers.dataParentChildToTree(
         {"test": {
             "123": "456"
         }})
     self.assertIsInstance(tree, dict)
    def test_buildGraphGexf_should_return_bytes(self):
        """
        Test buildGraphGexf(root, title, data, flt=[])
        """
        gexf = SpiderFootHelpers.buildGraphGexf('test root', 'test title', [[
            "test", "test", "test", "test", "test", "test", "test", "test",
            "test", "test", "test", "ENTITY", "test", "test", "test"
        ]])
        self.assertIsInstance(gexf, bytes)

        self.assertEqual('TBD', 'TBD')
    def test_buildGraphJson_should_return_a_string(self):
        """
        Test buildGraphJson(root, data, flt=list())
        """
        json = SpiderFootHelpers.buildGraphJson('test root', [[
            "test", "test", "test", "test", "test", "test", "test", "test",
            "test", "test", "test", "ENTITY", "test", "test", "test"
        ]])
        self.assertIsInstance(json, str)

        self.assertEqual('TBD', 'TBD')
    def test_validPhoneNumber_should_return_a_boolean(self):
        invalid_types = [None, "", list(), dict(), int()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                valid_phone = SpiderFootHelpers.validPhoneNumber(invalid_type)
                self.assertIsInstance(valid_phone, bool)
                self.assertFalse(valid_phone)

        valid_phone = SpiderFootHelpers.validPhoneNumber('+1234567890')
        self.assertIsInstance(valid_phone, bool)
        self.assertFalse(valid_phone)

        valid_phone = SpiderFootHelpers.validPhoneNumber(
            '+12345678901234567890')
        self.assertIsInstance(valid_phone, bool)
        self.assertFalse(valid_phone)

        valid_phone = SpiderFootHelpers.validPhoneNumber('+12345678901')
        self.assertIsInstance(valid_phone, bool)
        self.assertTrue(valid_phone)
 def test_extractPgpKeysFromText_should_return_list_of_pgp_keys_from_string(
         self):
     key1 = "-----BEGIN PGP PUBLIC KEY BLOCK-----Version: software v1.2.3\nComment: sample comment\n\nmQINBFRUAGoBEACuk6ze2V2pZtScf1Ul25N2CX19AeL7sVYwnyrTYuWdG2FmJx4x\nDLTLVUazp2AEm/JhskulL/7VCZPyg7ynf+o20Tu9/6zUD7p0rnQA2k3Dz+7dKHHh\neEsIl5EZyFy1XodhUnEIjel2nGe6f1OO7Dr3UIEQw5JnkZyqMcbLCu9sM2twFyfa\na8JNghfjltLJs3/UjJ8ZnGGByMmWxrWQUItMpQjGr99nZf4L+IPxy2i8O8WQewB5\nfvfidBGruUYC+mTw7CusaCOQbBuZBiYduFgH8hRW97KLmHn0xzB1FV++KI7syo8q\nXGo8Un24WP40IT78XjKO\n=nUop\n-----END PGP PUBLIC KEY BLOCK-----"
     key2 = "-----BEGIN PGP PRIVATE KEY BLOCK-----Version: software v1.2.3\nComment: sample comment\n\nmQINBFRUAGoBEACuk6ze2V2pZtScf1Ul25N2CX19AeL7sVYwnyrTYuWdG2FmJx4x\nDLTLVUazp2AEm/JhskulL/7VCZPyg7ynf+o20Tu9/6zUD7p0rnQA2k3Dz+7dKHHh\neEsIl5EZyFy1XodhUnEIjel2nGe6f1OO7Dr3UIEQw5JnkZyqMcbLCu9sM2twFyfa\na8JNghfjltLJs3/UjJ8ZnGGByMmWxrWQUItMpQjGr99nZf4L+IPxy2i8O8WQewB5\nfvfidBGruUYC+mTw7CusaCOQbBuZBiYduFgH8hRW97KLmHn0xzB1FV++KI7syo8q\nXGo8Un24WP40IT78XjKO\n=nUop\n-----END PGP PRIVATE KEY BLOCK-----"
     keys = SpiderFootHelpers.extractPgpKeysFromText(
         f"<html><body><p>sample{key1}sample</p><p>sample{key2}sample</p></body></html>"
     )
     self.assertIsInstance(keys, list)
     self.assertIn(key1, keys)
     self.assertIn(key2, keys)
     self.assertEqual(len(keys), 2)
    def test_sanitise_input(self):
        """
        Test sanitiseInput(self, cmd)
        """
        safe = SpiderFootHelpers.sanitiseInput("example-string")
        self.assertIsInstance(safe, bool)
        self.assertTrue(safe)

        safe = SpiderFootHelpers.sanitiseInput("example-string\n")
        self.assertIsInstance(safe, bool)
        self.assertFalse(safe)

        safe = SpiderFootHelpers.sanitiseInput("example string")
        self.assertIsInstance(safe, bool)
        self.assertFalse(safe)

        safe = SpiderFootHelpers.sanitiseInput("-example-string")
        self.assertIsInstance(safe, bool)
        self.assertFalse(safe)

        safe = SpiderFootHelpers.sanitiseInput("..example-string")
        self.assertIsInstance(safe, bool)
        self.assertFalse(safe)

        safe = SpiderFootHelpers.sanitiseInput("12")
        self.assertIsInstance(safe, bool)
        self.assertFalse(safe)
    def test_valid_email_should_return_a_boolean(self):
        invalid_types = [None, "", list(), dict()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                valid_email = SpiderFootHelpers.validEmail(invalid_type)
                self.assertIsInstance(valid_email, bool)
                self.assertFalse(valid_email)

        valid_email = SpiderFootHelpers.validEmail('*****@*****.**')
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = SpiderFootHelpers.validEmail('*****@*****.**')
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = SpiderFootHelpers.validEmail(
            '[email protected]\n.com')
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = SpiderFootHelpers.validEmail('root@localhost')
        self.assertIsInstance(valid_email, bool)
        self.assertFalse(valid_email)

        valid_email = SpiderFootHelpers.validEmail('*****@*****.**')
        self.assertIsInstance(valid_email, bool)
        self.assertTrue(valid_email)
Example #28
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventData in self.results:
            return

        self.results[eventData] = True

        self.debug(f"Received event, {eventName}, from {srcModuleName}")

        # Get e-mail addresses on this domain
        res = self.sf.fetchUrl(f"https://www.email-format.com/d/{eventData}/",
                               timeout=self.opts['_fetchtimeout'],
                               useragent=self.opts['_useragent'])

        if res['content'] is None:
            return

        html = BeautifulSoup(res["content"], features="lxml")
        if not html:
            return

        tbody = html.find('tbody')
        if tbody:
            data = str(tbody.contents)
        else:
            # fall back to raw page contents
            data = res["content"]

        emails = SpiderFootHelpers.extractEmailsFromText(data)
        for email in emails:
            # Skip unrelated emails
            mailDom = email.lower().split('@')[1]
            if not self.getTarget().matches(mailDom):
                self.debug(f"Skipped address: {email}")
                continue

            # Skip masked emails
            if re.match(r"^[0-9a-f]{8}\.[0-9]{7}@", email):
                self.debug(f"Skipped address: {email}")
                continue

            self.info(f"Found e-mail address: {email}")
            if email.split("@")[0] in self.opts['_genericusers'].split(","):
                evttype = "EMAILADDR_GENERIC"
            else:
                evttype = "EMAILADDR"

            evt = SpiderFootEvent(evttype, email, self.__name__, event)
            self.notifyListeners(evt)
Example #29
0
 def checkValidity(self, junkUrl):
     # Try and fetch an obviously missing version of the junk file
     fetch = junkUrl + str(random.SystemRandom().randint(0, 99999999))
     res = self.sf.fetchUrl(fetch,
                            headOnly=True,
                            timeout=self.opts['_fetchtimeout'],
                            useragent=self.opts['_useragent'],
                            verify=False)
     if res['code'] != "404":
         host = SpiderFootHelpers.urlBaseUrl(junkUrl)
         self.skiphosts[host] = True
         return False
     return True
Example #30
0
    def detectCountryFromIBAN(self, srcIBAN: str) -> str:
        """Detect name of country from IBAN.

        Args:
            srcIBAN (str): IBAN

        Returns:
            str: country name
        """
        if not isinstance(srcIBAN, str):
            return None

        return SpiderFootHelpers.countryNameFromCountryCode(srcIBAN[0:2])