Python SearchCensys Examples

Programming Language: Python

Namespace/Package Name: theHarvester.discovery.censys

Method/Function: SearchCensys

Examples at hotexamples.com: 2

Python SearchCensys - 2 examples found. These are the top rated real world Python examples of theHarvester.discovery.censys.SearchCensys extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: __main__.py Project: scaramucciaXx/theHarvester

def start():
    parser = argparse.ArgumentParser(
        description=
        'theHarvester is used to gather open source intelligence (OSINT) on a\n'
        'company or domain.')
    parser.add_argument('-d',
                        '--domain',
                        help='company name or domain to search',
                        required=True)
    parser.add_argument('-l',
                        '--limit',
                        help='limit the number of search results, default=500',
                        default=500,
                        type=int)
    parser.add_argument('-S',
                        '--start',
                        help='start with result number X, default=0',
                        default=0,
                        type=int)
    parser.add_argument('-g',
                        '--google-dork',
                        help='use Google Dorks for Google search',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-p',
        '--port-scan',
        help=
        'scan the detected hosts and check for Takeovers (21,22,80,443,8080)',
        default=False,
        action='store_true')
    parser.add_argument('-s',
                        '--shodan',
                        help='use Shodan to query discovered hosts',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-v',
        '--virtual-host',
        help='verify host name via DNS resolution and search for virtual hosts',
        action='store_const',
        const='basic',
        default=False)
    parser.add_argument('-e',
                        '--dns-server',
                        help='DNS server to use for lookup')
    parser.add_argument(
        '-t',
        '--dns-tld',
        help='perform a DNS TLD expansion discovery, default False',
        default=False)
    parser.add_argument('-n',
                        '--dns-lookup',
                        help='enable DNS server lookup, default False',
                        default=False,
                        action='store_true')
    parser.add_argument('-c',
                        '--dns-brute',
                        help='perform a DNS brute force on the domain',
                        default=False,
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='save the results to an HTML and/or XML file',
                        default='',
                        type=str)
    parser.add_argument(
        '-b',
        '--source',
        help='''baidu, bing, bingapi, censys, crtsh, dnsdumpster,
                        dogpile, duckduckgo, github-code, google,
                        hunter, intelx,
                        linkedin, linkedin_links, netcraft, securityTrails, threatcrowd,
                        trello, twitter, vhost, virustotal, yahoo''')

    args = parser.parse_args()
    try:
        db = stash.stash_manager()
        db.do_init()
    except Exception:
        pass

    all_emails = []
    all_hosts = []
    all_ip = []
    dnsbrute = args.dns_brute
    dnslookup = args.dns_lookup
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    filename = args.filename  # type: str
    full = []
    google_dorking = args.google_dork
    host_ip = []
    limit = args.limit  # type: int
    ports_scanning = args.port_scan
    shodan = args.shodan
    start = args.start  # type: int
    takeover_check = False
    trello_info = ([], False)
    vhost = []
    virtual = args.virtual_host
    word = args.domain  # type: str

    if args.source is not None:
        engines = set(map(str.strip, args.source.split(',')))

        if set(engines).issubset(Core.get_supportedengines()):
            print(f'\033[94m[*] Target: {word} \n \033[0m')

            for engineitem in engines:
                if engineitem == 'baidu':
                    print('\033[94m[*] Searching Baidu. \033[0m')
                    from theHarvester.discovery import baidusearch
                    try:
                        baidu_search = baidusearch.SearchBaidu(word, limit)
                        baidu_search.process()
                        all_emails = filter(baidu_search.get_emails())
                        hosts = filter(baidu_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'baidu')
                        db.store_all(word, all_emails, 'email', 'baidu')
                    except Exception:
                        pass

                elif engineitem == 'bing' or engineitem == 'bingapi':
                    print('\033[94m[*] Searching Bing. \033[0m')
                    from theHarvester.discovery import bingsearch
                    try:
                        bing_search = bingsearch.SearchBing(word, limit, start)
                        bingapi = ''
                        if engineitem == 'bingapi':
                            bingapi += 'yes'
                        else:
                            bingapi += 'no'
                        bing_search.process(bingapi)
                        all_emails = filter(bing_search.get_emails())
                        hosts = filter(bing_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'email', 'bing')
                        db.store_all(word, all_hosts, 'host', 'bing')
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'censys':
                    print('\033[94m[*] Searching Censys. \033[0m')
                    from theHarvester.discovery import censys
                    # Import locally or won't work
                    censys_search = censys.SearchCensys(word, limit)
                    censys_search.process()
                    all_ip = censys_search.get_ipaddresses()
                    hosts = filter(censys_search.get_hostnames())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'censys')
                    db.store_all(word, all_ip, 'ip', 'censys')

                elif engineitem == 'crtsh':
                    try:
                        print('\033[94m[*] Searching CRT.sh. \033[0m')
                        from theHarvester.discovery import crtsh
                        crtsh_search = crtsh.SearchCrtsh(word)
                        crtsh_search.process()
                        hosts = filter(crtsh_search.get_data())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'CRTsh')

                    except Exception:
                        print(
                            f'\033[93m[!] An timeout occurred with crtsh, cannot find {args.domain}\033[0m'
                        )

                elif engineitem == 'dnsdumpster':
                    try:
                        print('\033[94m[*] Searching DNSdumpster. \033[0m')
                        from theHarvester.discovery import dnsdumpster
                        dns_dumpster_search = dnsdumpster.SearchDnsDumpster(
                            word)
                        dns_dumpster_search.process()
                        hosts = filter(dns_dumpster_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'dnsdumpster')
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m'
                        )

                elif engineitem == 'dogpile':
                    try:
                        print('\033[94m[*] Searching Dogpile. \033[0m')
                        from theHarvester.discovery import dogpilesearch
                        dogpile_search = dogpilesearch.SearchDogpile(
                            word, limit)
                        dogpile_search.process()
                        emails = filter(dogpile_search.get_emails())
                        hosts = filter(dogpile_search.get_hostnames())
                        all_hosts.extend(hosts)
                        all_emails.extend(emails)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'email', 'dogpile')
                        db.store_all(word, all_hosts, 'host', 'dogpile')
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with Dogpile: {e} \033[0m'
                        )

                elif engineitem == 'duckduckgo':
                    print('\033[94m[*] Searching DuckDuckGo. \033[0m')
                    from theHarvester.discovery import duckduckgosearch
                    duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(
                        word, limit)
                    duckduckgo_search.process()
                    emails = filter(duckduckgo_search.get_emails())
                    hosts = filter(duckduckgo_search.get_hostnames())
                    all_hosts.extend(hosts)
                    all_emails.extend(emails)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'email', 'duckduckgo')
                    db.store_all(word, all_hosts, 'host', 'duckduckgo')

                elif engineitem == 'github-code':
                    print('\033[94m[*] Searching Github (code). \033[0m')
                    try:
                        from theHarvester.discovery import githubcode
                        github_search = githubcode.SearchGithubCode(
                            word, limit)
                        github_search.process()
                        emails = filter(github_search.get_emails())
                        all_emails.extend(emails)
                        hosts = filter(github_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'github-code')
                        db.store_all(word, all_emails, 'email', 'github-code')
                    except MissingKey as ex:
                        print(ex)
                    else:
                        pass

                elif engineitem == 'exalead':
                    print('\033[94m[*] Searching Exalead \033[0m')
                    from theHarvester.discovery import exaleadsearch
                    exalead_search = exaleadsearch.SearchExalead(
                        word, limit, start)
                    exalead_search.process()
                    emails = filter(exalead_search.get_emails())
                    all_emails.extend(emails)
                    hosts = filter(exalead_search.get_hostnames())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'exalead')
                    db.store_all(word, all_emails, 'email', 'exalead')

                elif engineitem == 'google':
                    print('\033[94m[*] Searching Google. \033[0m')
                    from theHarvester.discovery import googlesearch
                    google_search = googlesearch.SearchGoogle(
                        word, limit, start)
                    google_search.process(google_dorking)
                    emails = filter(google_search.get_emails())
                    all_emails.extend(emails)
                    hosts = filter(google_search.get_hostnames())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'google')
                    db.store_all(word, all_emails, 'email', 'google')

                elif engineitem == 'hunter':
                    print('\033[94m[*] Searching Hunter. \033[0m')
                    from theHarvester.discovery import huntersearch
                    # Import locally or won't work.
                    try:
                        hunter_search = huntersearch.SearchHunter(
                            word, limit, start)
                        hunter_search.process()
                        emails = filter(hunter_search.get_emails())
                        all_emails.extend(emails)
                        hosts = filter(hunter_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'hunter')
                        db.store_all(word, all_emails, 'email', 'hunter')
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'intelx':
                    print('\033[94m[*] Searching Intelx. \033[0m')
                    from theHarvester.discovery import intelxsearch
                    # Import locally or won't work.
                    try:
                        intelx_search = intelxsearch.SearchIntelx(word, limit)
                        intelx_search.process()
                        emails = filter(intelx_search.get_emails())
                        all_emails.extend(emails)
                        hosts = filter(intelx_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'intelx')
                        db.store_all(word, all_emails, 'email', 'intelx')
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(e)

                elif engineitem == 'linkedin':
                    print('\033[94m[*] Searching Linkedin. \033[0m')
                    from theHarvester.discovery import linkedinsearch
                    linkedin_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    linkedin_search.process()
                    people = linkedin_search.get_people()
                    db = stash.stash_manager()
                    db.store_all(word, people, 'name', 'linkedin')

                    if len(people) == 0:
                        print('\n[*] No users found Linkedin.\n\n')
                    else:
                        print(f'\n[*] Users found: {len(people)}')
                        print('---------------------')
                        for user in sorted(list(set(people))):
                            print(user)

                elif engineitem == 'linkedin_links':
                    print('\033[94m[*] Searching Linkedin. \033[0m')
                    from theHarvester.discovery import linkedinsearch
                    linkedin_links_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    linkedin_links_search.process()
                    people = linkedin_links_search.get_links()
                    db = stash.stash_manager()
                    db.store_all(word, people, 'name', 'linkedin')

                    if len(people) == 0:
                        print('\n[*] No links found Linkedin.\n\n')
                    else:
                        print(f'\n[*] Links found: {len(people)}')
                        print('---------------------')
                        for user in sorted(list(set(people))):
                            print(user)

                elif engineitem == 'netcraft':
                    print('\033[94m[*] Searching Netcraft. \033[0m')
                    from theHarvester.discovery import netcraft
                    netcraft_search = netcraft.SearchNetcraft(word)
                    netcraft_search.process()
                    hosts = filter(netcraft_search.get_hostnames())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'netcraft')

                elif engineitem == 'securityTrails':
                    print('\033[94m[*] Searching SecurityTrails. \033[0m')
                    from theHarvester.discovery import securitytrailssearch
                    try:
                        securitytrails_search = securitytrailssearch.SearchSecuritytrail(
                            word)
                        securitytrails_search.process()
                        hosts = filter(securitytrails_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, hosts, 'host', 'securityTrails')
                        ips = securitytrails_search.get_ips()
                        all_ip.extend(ips)
                        db = stash.stash_manager()
                        db.store_all(word, ips, 'ip', 'securityTrails')
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'threatcrowd':
                    print('\033[94m[*] Searching Threatcrowd. \033[0m')
                    from theHarvester.discovery import threatcrowd
                    try:
                        threatcrowd_search = threatcrowd.SearchThreatcrowd(
                            word)
                        threatcrowd_search.process()
                        hosts = filter(threatcrowd_search.get_hostnames())
                        all_hosts.extend(hosts)
                        db = stash.stash_manager()
                        db.store_all(word, all_hosts, 'host', 'threatcrowd')
                    except Exception as e:
                        print(e)

                elif engineitem == 'trello':
                    print('\033[94m[*] Searching Trello. \033[0m')
                    from theHarvester.discovery import trello
                    # Import locally or won't work.
                    trello_search = trello.SearchTrello(word, limit)
                    trello_search.process()
                    emails = filter(trello_search.get_emails())
                    all_emails.extend(emails)
                    info = trello_search.get_urls()
                    hosts = filter(info[0])
                    trello_info = (info[1], True)
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, hosts, 'host', 'trello')
                    db.store_all(word, emails, 'email', 'trello')

                elif engineitem == 'twitter':
                    print(
                        '\033[94m[*] Searching Twitter usernames using Google. \033[0m'
                    )
                    from theHarvester.discovery import twittersearch
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    twitter_search.process()
                    people = twitter_search.get_people()
                    db = stash.stash_manager()
                    db.store_all(word, people, 'name', 'twitter')

                    if len(people) == 0:
                        print('\n[*] No users found.\n\n')
                    else:
                        print('\n[*] Users found: ' + str(len(people)))
                        print('---------------------')
                        for user in sorted(list(set(people))):
                            print(user)

                elif engineitem == 'virustotal':
                    print('\033[94m[*] Searching VirusTotal. \033[0m')
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
                    virustotal_search.process()
                    hosts = filter(virustotal_search.get_hostnames())
                    all_hosts.extend(hosts)
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'virustotal')

                elif engineitem == 'yahoo':
                    print('\033[94m[*] Searching Yahoo. \033[0m')
                    from theHarvester.discovery import yahoosearch
                    yahoo_search = yahoosearch.SearchYahoo(word, limit)
                    yahoo_search.process()
                    hosts = yahoo_search.get_hostnames()
                    emails = yahoo_search.get_emails()
                    all_hosts.extend(filter(hosts))
                    all_emails.extend(filter(emails))
                    db = stash.stash_manager()
                    db.store_all(word, all_hosts, 'host', 'yahoo')
                    db.store_all(word, all_emails, 'email', 'yahoo')
        else:
            print('\033[93m[!] Invalid source.\n\n \033[0m')
            sys.exit(1)

    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
    except NameError:
        print(
            '\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m'
        )
        sys.exit(1)
    try:
        all_hosts
    except NameError:
        print(
            '\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m'
        )
        sys.exit(1)

    # Results
    if len(all_ip) == 0:
        print('\n[*] No IPs found.')
    else:
        print('\n[*] IPs found: ' + str(len(all_ip)))
        print('-------------------')
        ips = sorted(
            ipaddress.ip_address(line.strip()) for line in set(all_ip))
        print('\n'.join(map(str, ips)))

    if len(all_emails) == 0:
        print('\n[*] No emails found.')
    else:
        print('\n[*] Emails found: ' + str(len(all_emails)))
        print('----------------------')
        print(('\n'.join(sorted(list(set(all_emails))))))

    if len(all_hosts) == 0:
        print('\n[*] No hosts found.\n\n')
    else:
        print('\n[*] Hosts found: ' + str(len(all_hosts)))
        print('---------------------')
        all_hosts = sorted(list(set(all_hosts)))
        full_host = hostchecker.Checker(all_hosts)
        full = full_host.check()
        for host in full:
            ip = host.split(':')[1]
            print(host)
            if ip != 'empty':
                if host_ip.count(ip.lower()):
                    pass
                else:
                    host_ip.append(ip.lower())

        db = stash.stash_manager()
        db.store_all(word, host_ip, 'ip', 'DNS-resolver')

    if trello_info[1] is True:
        trello_urls = trello_info[0]
        if trello_urls is []:
            print('\n[*] No URLs found.')
        else:
            total = len(trello_urls)
            print('\n[*] URLs found: ' + str(total))
            print('--------------------')
            for url in sorted(list(set(trello_urls))):
                print(url)

    # DNS brute force
    # dnsres = []
    if dnsbrute is True:
        print('\n[*] Starting DNS brute force.')
        a = dnssearch.DnsForce(word, dnsserver, verbose=True)
        res = a.process()
        # print('\n[*] Hosts found after DNS brute force:')
        # for y in res:
        # print('-------------------------------------')
        #    print(y)
        #   dnsres.append(y.split(':')[0])
        #    if y not in full:
        #        full.append(y)
        # db = stash.stash_manager()
        # db.store_all(word, dnsres, 'host', 'dns_bruteforce')

    # Port scanning
    if ports_scanning is True:
        print('\n\n[*] Scanning ports (active).\n')
        for x in full:
            host = x.split(':')[1]
            domain = x.split(':')[0]
            if host != 'empty':
                print(('[*] Scanning ' + host))
                ports = [21, 22, 80, 443, 8080]
                try:
                    scan = port_scanner.PortScan(host, ports)
                    openports = scan.process()
                    if len(openports) > 1:
                        print(('\t[*] Detected open ports: ' +
                               ','.join(str(e) for e in openports)))
                    takeover_check = 'True'
                    if takeover_check == 'True' and len(openports) > 0:
                        search_take = takeover.TakeOver(domain)
                        search_take.process()
                except Exception as e:
                    print(e)

    # DNS reverse lookup
    dnsrev = []
    if dnslookup is True:
        print('\n[*] Starting active queries.')
        analyzed_ranges = []
        for entry in host_ip:
            print(entry)
            ip = entry.split(':')[0]
            ip_range = ip.split('.')
            ip_range[3] = '0/24'
            s = '.'
            ip_range = s.join(ip_range)
            if not analyzed_ranges.count(ip_range):
                print('[*] Performing reverse lookup in ' + ip_range)
                a = dnssearch.DnsReverse(ip_range, True)
                a.list()
                res = a.process()
                analyzed_ranges.append(ip_range)
            else:
                continue
            for entries in res:
                if entries.count(word):
                    dnsrev.append(entries)
                    if entries not in full:
                        full.append(entries)
        print('[*] Hosts found after reverse lookup (in target domain):')
        print('--------------------------------------------------------')
        for xh in dnsrev:
            print(xh)

    # DNS TLD expansion
    dnstldres = []
    if dnstld is True:
        print('[*] Starting DNS TLD expansion.')
        a = dnssearch.DnsTld(word, dnsserver, verbose=True)
        res = a.process()
        print('\n[*] Hosts found after DNS TLD expansion:')
        print('----------------------------------------')
        for y in res:
            print(y)
            dnstldres.append(y)
            if y not in full:
                full.append(y)

    # Virtual hosts search
    if virtual == 'basic':
        print('\n[*] Virtual hosts:')
        print('------------------')
        for l in host_ip:
            basic_search = bingsearch.SearchBing(l, limit, start)
            basic_search.process_vhost()
            results = basic_search.get_allhostnames()
            for result in results:
                result = re.sub(r'[[\<\/?]*[\w]*>]*', '', result)
                result = re.sub('<', '', result)
                result = re.sub('>', '', result)
                print((l + '\t' + result))
                vhost.append(l + ':' + result)
                full.append(l + ':' + result)
        vhost = sorted(set(vhost))
    else:
        pass

    # Shodan
    shodanres = []
    if shodan is True:
        import texttable
        tab = texttable.Texttable()
        header = [
            'IP address', 'Hostname', 'Org', 'Services:Ports', 'Technologies'
        ]
        tab.header(header)
        tab.set_cols_align(['c', 'c', 'c', 'c', 'c'])
        tab.set_cols_valign(['m', 'm', 'm', 'm', 'm'])
        tab.set_chars(['-', '|', '+', '#'])
        tab.set_cols_width([15, 20, 15, 15, 18])
        host_ip = list(set(host_ip))
        print('\033[94m[*] Searching Shodan. \033[0m')
        try:
            for ip in host_ip:
                print(('\tSearching for ' + ip))
                shodan = shodansearch.SearchShodan()
                rowdata = shodan.search_ip(ip)
                time.sleep(2)
                tab.add_row(rowdata)
            printedtable = tab.draw()
            print(printedtable)
        except Exception as e:
            print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
    else:
        pass

    # Here we need to add explosion mode.
    # We have to take out the TLDs to do this.
    recursion = None
    if recursion:
        counter = 0
        for word in vhost:
            search = googlesearch.SearchGoogle(word, limit, counter)
            search.process(google_dorking)
            emails = search.get_emails()
            hosts = search.get_hostnames()
            print(emails)
            print(hosts)
    else:
        pass

    # Reporting
    if filename != "":
        try:
            print('\n[*] Reporting started.')
            db = stash.stash_manager()
            scanboarddata = db.getscanboarddata()
            latestscanresults = db.getlatestscanresults(word)
            previousscanresults = db.getlatestscanresults(word,
                                                          previousday=True)
            latestscanchartdata = db.latestscanchartdata(word)
            scanhistorydomain = db.getscanhistorydomain(word)
            pluginscanstatistics = db.getpluginscanstatistics()
            generator = statichtmlgenerator.HtmlGenerator(word)
            HTMLcode = generator.beginhtml()
            HTMLcode += generator.generatelatestscanresults(latestscanresults)
            HTMLcode += generator.generatepreviousscanresults(
                previousscanresults)
            graph = reportgraph.GraphGenerator(word)
            HTMLcode += graph.drawlatestscangraph(word, latestscanchartdata)
            HTMLcode += graph.drawscattergraphscanhistory(
                word, scanhistorydomain)
            HTMLcode += generator.generatepluginscanstatistics(
                pluginscanstatistics)
            HTMLcode += generator.generatedashboardcode(scanboarddata)
            HTMLcode += '<p><span style="color: #000000;">Report generated on ' + str(
                datetime.datetime.now()) + '</span></p>'
            HTMLcode += '''
            </body>
            </html>
            '''
            Html_file = open(filename, 'w')
            Html_file.write(HTMLcode)
            Html_file.close()
            print('[*] Reporting finished.')
            print('[*] Saving files.')
        except Exception as e:
            print(e)
            print(
                '\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m'
            )
            sys.exit(1)

        try:
            filename = filename.split('.')[0] + '.xml'
            file = open(filename, 'w')
            file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
            for x in all_emails:
                file.write('<email>' + x + '</email>')
            for x in full:
                x = x.split(':')
                if len(x) == 2:
                    file.write('<host>' + '<ip>' + x[1] + '</ip><hostname>' +
                               x[0] + '</hostname>' + '</host>')
                else:
                    file.write('<host>' + x + '</host>')
            for x in vhost:
                x = x.split(':')
                if len(x) == 2:
                    file.write('<vhost>' + '<ip>' + x[1] + '</ip><hostname>' +
                               x[0] + '</hostname>' + '</vhost>')
                else:
                    file.write('<vhost>' + x + '</vhost>')
            if shodanres != []:
                shodanalysis = []
                for x in shodanres:
                    res = x.split('SAPO')
                    file.write('<shodan>')
                    file.write('<host>' + res[0] + '</host>')
                    file.write('<port>' + res[2] + '</port>')
                    file.write('<banner><!--' + res[1] + '--></banner>')
                    reg_server = re.compile('Server:.*')
                    temp = reg_server.findall(res[1])
                    if temp != []:
                        shodanalysis.append(res[0] + ':' + temp[0])
                    file.write('</shodan>')
                if shodanalysis != []:
                    shodanalysis = sorted(set(shodanalysis))
                    file.write('<servers>')
                    for x in shodanalysis:
                        file.write('<server>' + x + '</server>')
                    file.write('</servers>')

            file.write('</theHarvester>')
            file.flush()
            file.close()
            print('[*] Files saved.')
        except Exception as er:
            print(
                f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m'
            )
        print('\n\n')
        sys.exit(0)

Example #2

Show file

async def start():
    parser = argparse.ArgumentParser(
        description=
        'theHarvester is used to gather open source intelligence (OSINT) on a company or domain.'
    )
    parser.add_argument('-d',
                        '--domain',
                        help='Company name or domain to search.',
                        required=True)
    parser.add_argument(
        '-l',
        '--limit',
        help='Limit the number of search results, default=500.',
        default=500,
        type=int)
    parser.add_argument('-S',
                        '--start',
                        help='Start with result number X, default=0.',
                        default=0,
                        type=int)
    parser.add_argument('-g',
                        '--google-dork',
                        help='Use Google Dorks for Google search.',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-p',
        '--proxies',
        help='Use proxies for requests, enter proxies in proxies.yaml.',
        default=False,
        action='store_true')
    parser.add_argument('-s',
                        '--shodan',
                        help='Use Shodan to query discovered hosts.',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '--screenshot',
        help=
        'Take screenshots of resolved domains specify output directory: --screenshot output_directory',
        default="",
        type=str)
    parser.add_argument(
        '-v',
        '--virtual-host',
        help=
        'Verify host name via DNS resolution and search for virtual hosts.',
        action='store_const',
        const='basic',
        default=False)
    parser.add_argument('-e',
                        '--dns-server',
                        help='DNS server to use for lookup.')
    parser.add_argument(
        '-t',
        '--dns-tld',
        help='Perform a DNS TLD expansion discovery, default False.',
        default=False)
    parser.add_argument('-r',
                        '--take-over',
                        help='Check for takeovers.',
                        default=False,
                        action='store_true')
    parser.add_argument('-n',
                        '--dns-lookup',
                        help='Enable DNS server lookup, default False.',
                        default=False,
                        action='store_true')
    parser.add_argument('-c',
                        '--dns-brute',
                        help='Perform a DNS brute force on the domain.',
                        default=False,
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='Save the results to an HTML and/or XML file.',
                        default='',
                        type=str)
    parser.add_argument(
        '-b',
        '--source',
        help='''baidu, bing, bingapi, bufferoverun, censys, certspotter, crtsh,
                            dnsdumpster, duckduckgo, exalead, github-code, google,
                            hackertarget, hunter, intelx, linkedin, linkedin_links,
                            netcraft, omnisint, otx, pentesttools, projectdiscovery,
                            qwant, rapiddns, securityTrails, spyse, sublist3r, threatcrowd, threatminer,
                            trello, twitter, urlscan, virustotal, yahoo''')

    args = parser.parse_args()
    filename: str = args.filename
    dnsbrute = (args.dns_brute, False)
    try:
        db = stash.StashManager()
        await db.do_init()
    except Exception:
        pass

    all_emails: list = []
    all_hosts: list = []
    all_ip: list = []
    dnslookup = args.dns_lookup
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    engines = []
    # If the user specifies

    full: list = []
    ips: list = []
    google_dorking = args.google_dork
    host_ip: list = []
    limit: int = args.limit
    shodan = args.shodan
    start: int = args.start
    all_urls: list = []
    vhost: list = []
    virtual = args.virtual_host
    word: str = args.domain
    takeover_status = args.take_over
    use_proxy = args.proxies

    async def store(search_engine: Any,
                    source: str,
                    process_param: Any = None,
                    store_host: bool = False,
                    store_emails: bool = False,
                    store_ip: bool = False,
                    store_people: bool = False,
                    store_links: bool = False,
                    store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
        await search_engine.process(use_proxy) if process_param is None else await \
            search_engine.process(process_param, use_proxy)
        db_stash = stash.StashManager()
        if source == 'suip':
            print(
                f'\033[94m[*] Searching {source[0].upper() + source[1:]} this module can take 10+ min but is worth '
                f'it. \033[0m')
        else:
            print(
                f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m'
            )
        if store_host:
            host_names = [
                host for host in filter(await search_engine.get_hostnames())
                if f'.{word}' in host
            ]
            if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
                # If source is inside this conditional it means the hosts returned must be resolved to obtain ip
                full_hosts_checker = hostchecker.Checker(host_names)
                temp_hosts, temp_ips = await full_hosts_checker.check()
                ips.extend(temp_ips)
                full.extend(temp_hosts)
            else:
                full.extend(host_names)
            all_hosts.extend(host_names)
            await db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
            all_emails.extend(email_list)
            await db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
            all_ip.extend(ips_list)
            await db_stash.store_all(word, all_ip, 'ip', source)
        if store_results:
            email_list, host_names, urls = await search_engine.get_results()
            all_emails.extend(email_list)
            host_names = [
                host for host in filter(host_names) if f'.{word}' in host
            ]
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            await db.store_all(word, all_hosts, 'host', source)
            await db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = await search_engine.get_people()
            await db_stash.store_all(word, people_list, 'people', source)
            if len(people_list) == 0:
                print('\n[*] No users found.\n\n')
            else:
                print('\n[*] Users found: ' + str(len(people_list)))
                print('---------------------')
                for usr in sorted(list(set(people_list))):
                    print(usr)
        if store_links:
            links = await search_engine.get_links()
            await db.store_all(word, links, 'name', engineitem)
            if len(links) == 0:
                print('\n[*] No links found.\n\n')
            else:
                print(f'\n[*] Links found: {len(links)}')
                print('---------------------')
                for link in sorted(list(set(links))):
                    print(link)

    stor_lst = []
    if args.source is not None:
        if args.source.lower() != 'all':
            engines = sorted(set(map(str.strip, args.source.split(','))))
        else:
            engines = Core.get_supportedengines()
        # Iterate through search engines in order
        if set(engines).issubset(Core.get_supportedengines()):
            print(f'\033[94m[*] Target: {word} \n \033[0m')

            for engineitem in engines:
                if engineitem == 'baidu':
                    from theHarvester.discovery import baidusearch
                    try:
                        baidu_search = baidusearch.SearchBaidu(word, limit)
                        stor_lst.append(
                            store(baidu_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception:
                        pass

                elif engineitem == 'bing' or engineitem == 'bingapi':
                    from theHarvester.discovery import bingsearch
                    try:
                        bing_search = bingsearch.SearchBing(word, limit, start)
                        bingapi = ''
                        if engineitem == 'bingapi':
                            bingapi += 'yes'
                        else:
                            bingapi += 'no'
                        stor_lst.append(
                            store(bing_search,
                                  'bing',
                                  process_param=bingapi,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(e)

                elif engineitem == 'bufferoverun':
                    from theHarvester.discovery import bufferoverun
                    try:
                        bufferoverun_search = bufferoverun.SearchBufferover(
                            word)
                        stor_lst.append(
                            store(bufferoverun_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'censys':
                    from theHarvester.discovery import censys
                    try:
                        censys_search = censys.SearchCensys(word)
                        stor_lst.append(
                            store(censys_search, engineitem, store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)

                elif engineitem == 'certspotter':
                    from theHarvester.discovery import certspottersearch
                    try:
                        certspotter_search = certspottersearch.SearchCertspoter(
                            word)
                        stor_lst.append(
                            store(certspotter_search,
                                  engineitem,
                                  None,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'crtsh':
                    try:
                        from theHarvester.discovery import crtsh
                        crtsh_search = crtsh.SearchCrtsh(word)
                        stor_lst.append(
                            store(crtsh_search, 'CRTsh', store_host=True))
                    except Exception as e:
                        print(
                            f'\033[93m[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}\033[0m'
                        )

                elif engineitem == 'dnsdumpster':
                    try:
                        from theHarvester.discovery import dnsdumpster
                        dns_dumpster_search = dnsdumpster.SearchDnsDumpster(
                            word)
                        stor_lst.append(
                            store(dns_dumpster_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m'
                        )

                elif engineitem == 'duckduckgo':
                    from theHarvester.discovery import duckduckgosearch
                    duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(
                        word, limit)
                    stor_lst.append(
                        store(duckduckgo_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'exalead':
                    from theHarvester.discovery import exaleadsearch
                    exalead_search = exaleadsearch.SearchExalead(
                        word, limit, start)
                    stor_lst.append(
                        store(exalead_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'github-code':
                    try:
                        from theHarvester.discovery import githubcode
                        github_search = githubcode.SearchGithubCode(
                            word, limit)
                        stor_lst.append(
                            store(github_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except MissingKey as ex:
                        print(ex)
                    else:
                        pass

                elif engineitem == 'google':
                    from theHarvester.discovery import googlesearch
                    google_search = googlesearch.SearchGoogle(
                        word, limit, start)
                    stor_lst.append(
                        store(google_search,
                              engineitem,
                              process_param=google_dorking,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'hackertarget':
                    from theHarvester.discovery import hackertarget
                    hackertarget_search = hackertarget.SearchHackerTarget(word)
                    stor_lst.append(
                        store(hackertarget_search, engineitem,
                              store_host=True))

                elif engineitem == 'hunter':
                    from theHarvester.discovery import huntersearch
                    # Import locally or won't work.
                    try:
                        hunter_search = huntersearch.SearchHunter(
                            word, limit, start)
                        stor_lst.append(
                            store(hunter_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'intelx':
                    from theHarvester.discovery import intelxsearch
                    # Import locally or won't work.
                    try:
                        intelx_search = intelxsearch.SearchIntelx(word, limit)
                        stor_lst.append(
                            store(intelx_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in Intelx search: {e}'
                            )

                elif engineitem == 'linkedin':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    stor_lst.append(
                        store(linkedin_search, engineitem, store_people=True))

                elif engineitem == 'linkedin_links':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_links_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    stor_lst.append(
                        store(linkedin_links_search,
                              'linkedin',
                              store_links=True))

                elif engineitem == 'netcraft':
                    from theHarvester.discovery import netcraft
                    netcraft_search = netcraft.SearchNetcraft(word)
                    stor_lst.append(
                        store(netcraft_search, engineitem, store_host=True))

                elif engineitem == 'omnisint':
                    from theHarvester.discovery import omnisint
                    try:
                        omnisint_search = omnisint.SearchOmnisint(word, limit)
                        stor_lst.append(
                            store(omnisint_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'otx':
                    from theHarvester.discovery import otxsearch
                    try:
                        otxsearch_search = otxsearch.SearchOtx(word)
                        stor_lst.append(
                            store(otxsearch_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'pentesttools':
                    from theHarvester.discovery import pentesttools
                    try:
                        pentesttools_search = pentesttools.SearchPentestTools(
                            word)
                        stor_lst.append(
                            store(pentesttools_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in PentestTools search: {e}'
                            )

                elif engineitem == 'projectdiscovery':
                    from theHarvester.discovery import projectdiscovery
                    try:
                        projectdiscovery_search = projectdiscovery.SearchDiscovery(
                            word)
                        stor_lst.append(
                            store(projectdiscovery_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                'An exception has occurred in ProjectDiscovery'
                            )

                elif engineitem == 'qwant':
                    from theHarvester.discovery import qwantsearch
                    qwant_search = qwantsearch.SearchQwant(word, start, limit)
                    stor_lst.append(
                        store(qwant_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'rapiddns':
                    from theHarvester.discovery import rapiddns
                    try:
                        rapiddns_search = rapiddns.SearchRapidDns(word)
                        stor_lst.append(
                            store(rapiddns_search, engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'securityTrails':
                    from theHarvester.discovery import securitytrailssearch
                    try:
                        securitytrails_search = securitytrailssearch.SearchSecuritytrail(
                            word)
                        stor_lst.append(
                            store(securitytrails_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'sublist3r':
                    from theHarvester.discovery import sublist3r
                    try:
                        sublist3r_search = sublist3r.SearchSublist3r(word)
                        stor_lst.append(
                            store(sublist3r_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'spyse':
                    from theHarvester.discovery import spyse
                    try:
                        spyse_search = spyse.SearchSpyse(word)
                        stor_lst.append(
                            store(spyse_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'threatcrowd':
                    from theHarvester.discovery import threatcrowd
                    try:
                        threatcrowd_search = threatcrowd.SearchThreatcrowd(
                            word)
                        stor_lst.append(
                            store(threatcrowd_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'threatminer':
                    from theHarvester.discovery import threatminer
                    try:
                        threatminer_search = threatminer.SearchThreatminer(
                            word)
                        stor_lst.append(
                            store(threatminer_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'trello':
                    from theHarvester.discovery import trello
                    # Import locally or won't work.
                    trello_search = trello.SearchTrello(word)
                    stor_lst.append(
                        store(trello_search, engineitem, store_results=True))

                elif engineitem == 'twitter':
                    from theHarvester.discovery import twittersearch
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    stor_lst.append(
                        store(twitter_search, engineitem, store_people=True))

                elif engineitem == 'urlscan':
                    from theHarvester.discovery import urlscan
                    try:
                        urlscan_search = urlscan.SearchUrlscan(word)
                        stor_lst.append(
                            store(urlscan_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'virustotal':
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
                    stor_lst.append(
                        store(virustotal_search, engineitem, store_host=True))

                elif engineitem == 'yahoo':

                    from theHarvester.discovery import yahoosearch
                    yahoo_search = yahoosearch.SearchYahoo(word, limit)
                    stor_lst.append(
                        store(yahoo_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))
        else:
            print('\033[93m[!] Invalid source.\n\n \033[0m')
            sys.exit(1)

    async def worker(queue):
        while True:
            # Get a "work item" out of the queue.
            stor = await queue.get()
            try:
                await stor
                queue.task_done()
                # Notify the queue that the "work item" has been processed.
            except Exception:
                queue.task_done()

    async def handler(lst):
        queue = asyncio.Queue()

        for stor_method in lst:
            # enqueue the coroutines
            queue.put_nowait(stor_method)
        # Create five worker tasks to process the queue concurrently.
        tasks = []
        for i in range(5):
            task = asyncio.create_task(worker(queue))
            tasks.append(task)

        # Wait until the queue is fully processed.
        await queue.join()

        # Cancel our worker tasks.
        for task in tasks:
            task.cancel()
        # Wait until all worker tasks are cancelled.
        await asyncio.gather(*tasks, return_exceptions=True)

    await handler(lst=stor_lst)
    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
    except NameError:
        print(
            '\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m'
        )
        sys.exit(1)
    try:
        all_hosts
    except NameError:
        print(
            '\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m'
        )
        sys.exit(1)

    # Results
    if len(all_ip) == 0:
        print('\n[*] No IPs found.')
    else:
        print('\n[*] IPs found: ' + str(len(all_ip)))
        print('-------------------')
        # use netaddr as the list may contain ipv4 and ipv6 addresses
        ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
        print('\n'.join(map(str, ip_list)))

    if len(all_emails) == 0:
        print('\n[*] No emails found.')
    else:
        print('\n[*] Emails found: ' + str(len(all_emails)))
        print('----------------------')
        print(('\n'.join(sorted(list(set(all_emails))))))

    if len(all_hosts) == 0:
        print('\n[*] No hosts found.\n\n')
    else:
        print('\n[*] Hosts found: ' + str(len(all_hosts)))
        print('---------------------')
        all_hosts = sorted(list(set(all_hosts)))
        db = stash.StashManager()
        full = [
            host if ':' in host and word in host else
            word in host.split(':')[0] and host for host in full
        ]
        full = list({host for host in full if host})
        full.sort(key=lambda el: el.split(':')[0])
        for host in full:
            print(host)
        host_ip = [
            netaddr_ip.format()
            for netaddr_ip in sorted([netaddr.IPAddress(ip) for ip in ips])
        ]
        await db.store_all(word, host_ip, 'ip', 'DNS-resolver')
    length_urls = len(all_urls)
    if length_urls == 0:
        if len(engines) >= 1 and 'trello' in engines:
            print('\n[*] No Trello URLs found.')
    else:
        total = length_urls
        print('\n[*] Trello URLs found: ' + str(total))
        print('--------------------')
        for url in sorted(all_urls):
            print(url)

    # DNS brute force
    if dnsbrute and dnsbrute[0] is True:
        print('\n[*] Starting DNS brute force.')
        dns_force = dnssearch.DnsForce(word, dnsserver, verbose=True)
        hosts, ips = await dns_force.run()
        hosts = list({host for host in hosts if ':' in host})
        hosts.sort(key=lambda el: el.split(':')[0])
        print('\n[*] Hosts found after DNS brute force:')
        db = stash.StashManager()
        for host in hosts:
            print(host)
            full.append(host)
        await db.store_all(word, hosts, 'host', 'dns_bruteforce')

    # TakeOver Checking
    if takeover_status:
        print('\n[*] Performing subdomain takeover check')
        print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
        search_take = takeover.TakeOver(all_hosts)
        await search_take.process(proxy=use_proxy)

    # DNS reverse lookup
    dnsrev = []
    if dnslookup is True:
        print('\n[*] Starting active queries.')
        # load the reverse dns tools
        from theHarvester.discovery.dnssearch import (
            generate_postprocessing_callback, reverse_all_ips_in_range,
            serialize_ip_range)

        # reverse each iprange in a separate task
        __reverse_dns_tasks = {}
        for entry in host_ip:
            __ip_range = serialize_ip_range(ip=entry, netmask='24')
            if __ip_range and __ip_range not in set(
                    __reverse_dns_tasks.keys()):
                print('\n[*] Performing reverse lookup on ' + __ip_range)
                __reverse_dns_tasks[__ip_range] = asyncio.create_task(
                    reverse_all_ips_in_range(
                        iprange=__ip_range,
                        callback=generate_postprocessing_callback(
                            target=word,
                            local_results=dnsrev,
                            overall_results=full),
                        nameservers=list(map(str, dnsserver.split(',')))
                        if dnsserver else None))

        # run all the reversing tasks concurrently
        await asyncio.gather(*__reverse_dns_tasks.values())

        # Display the newly found hosts
        print('\n[*] Hosts found after reverse lookup (in target domain):')
        print('--------------------------------------------------------')
        for xh in dnsrev:
            print(xh)

    # DNS TLD expansion
    dnstldres = []
    if dnstld is True:
        print('[*] Starting DNS TLD expansion.')
        a = dnssearch.DnsTld(word, dnsserver, verbose=True)
        res = a.process()
        print('\n[*] Hosts found after DNS TLD expansion:')
        print('----------------------------------------')
        for y in res:
            print(y)
            dnstldres.append(y)
            if y not in full:
                full.append(y)

    # Virtual hosts search
    if virtual == 'basic':
        print('\n[*] Virtual hosts:')
        print('------------------')
        for data in host_ip:
            basic_search = bingsearch.SearchBing(data, limit, start)
            await basic_search.process_vhost()
            results = await basic_search.get_allhostnames()
            for result in results:
                result = re.sub(r'[[</?]*[\w]*>]*', '', result)
                result = re.sub('<', '', result)
                result = re.sub('>', '', result)
                print((data + '\t' + result))
                vhost.append(data + ':' + result)
                full.append(data + ':' + result)
        vhost = sorted(set(vhost))
    else:
        pass

    # Screenshots
    screenshot_tups = []
    if len(args.screenshot) > 0:
        import time
        from aiomultiprocess import Pool
        from theHarvester.screenshot.screenshot import ScreenShotter
        screen_shotter = ScreenShotter(args.screenshot)
        path_exists = screen_shotter.verify_path()
        # Verify path exists if not create it or if user does not create it skip screenshot
        if path_exists:
            await screen_shotter.verify_installation()
            print(
                f'\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}'
            )
            start_time = time.perf_counter()
            print('Filtering domains for ones we can reach')
            unique_resolved_domains = {
                url.split(':')[0]
                for url in full if ':' in url and 'www.' not in url
            }
            if len(unique_resolved_domains) > 0:
                # First filter out ones that didn't resolve
                print(
                    'Attempting to visit unique resolved domains, this is ACTIVE RECON'
                )
                async with Pool(15) as pool:
                    results = await pool.map(screen_shotter.visit,
                                             list(unique_resolved_domains))
                    # Filter out domains that we couldn't connect to
                    unique_resolved_domains = list(
                        sorted({tup[0]
                                for tup in results if len(tup[1]) > 0}))
                async with Pool(3) as pool:
                    print(
                        f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!\n'
                    )
                    # If you have the resources you could make the function faster by increasing the chunk number
                    chunk_number = 25
                    for chunk in screen_shotter.chunk_list(
                            unique_resolved_domains, chunk_number):
                        try:
                            screenshot_tups.extend(await pool.map(
                                screen_shotter.take_screenshot, chunk))
                        except Exception as ee:
                            print(
                                f'An exception has occurred while mapping: {ee}'
                            )
            end = time.perf_counter()
            # There is probably an easier way to do this
            total = end - start_time
            mon, sec = divmod(total, 60)
            hr, mon = divmod(mon, 60)
            total_time = "%02d:%02d" % (mon, sec)
            print(f'Finished taking screenshots in {total_time} seconds')
            print(
                '[+] Note there may be leftover chrome processes you may have to kill manually\n'
            )

    # Shodan
    shodanres = []
    if shodan is True:
        import texttable
        tab = texttable.Texttable()
        header = [
            'IP address', 'Hostname', 'Org', 'Services:Ports', 'Technologies'
        ]
        tab.header(header)
        tab.set_cols_align(['c', 'c', 'c', 'c', 'c'])
        tab.set_cols_valign(['m', 'm', 'm', 'm', 'm'])
        tab.set_chars(['-', '|', '+', '#'])
        tab.set_cols_width([15, 20, 15, 15, 18])
        print('\033[94m[*] Searching Shodan. \033[0m')
        try:
            for ip in host_ip:
                print(('\tSearching for ' + ip))
                shodan = shodansearch.SearchShodan()
                rowdata = await shodan.search_ip(ip)
                await asyncio.sleep(2)
                tab.add_row(rowdata)
            printedtable = tab.draw()
            print(printedtable)
        except Exception as e:
            print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
    else:
        pass

    # Here we need to add explosion mode.
    # We have to take out the TLDs to do this.
    if args.dns_tld is not False:
        counter = 0
        for word in vhost:
            search = googlesearch.SearchGoogle(word, limit, counter)
            await search.process(google_dorking)
            emails = await search.get_emails()
            hosts = await search.get_hostnames()
            print(emails)
            print(hosts)
    else:
        pass

    # Reporting
    if filename != "":
        try:
            print('\n[*] Reporting started.')
            db = stash.StashManager()
            scanboarddata = await db.getscanboarddata()
            latestscanresults = await db.getlatestscanresults(word)
            previousscanresults = await db.getlatestscanresults(
                word, previousday=True)
            latestscanchartdata = await db.latestscanchartdata(word)
            scanhistorydomain = await db.getscanhistorydomain(word)
            pluginscanstatistics = await db.getpluginscanstatistics()
            generator = statichtmlgenerator.HtmlGenerator(word)
            html_code = await generator.beginhtml()
            html_code += await generator.generatedashboardcode(scanboarddata)
            html_code += await generator.generatelatestscanresults(
                latestscanresults)
            if len(screenshot_tups) > 0:
                html_code += await generator.generatescreenshots(
                    screenshot_tups)
            html_code += await generator.generatepreviousscanresults(
                previousscanresults)
            graph = reportgraph.GraphGenerator(word)
            await graph.init_db()
            html_code += await graph.drawlatestscangraph(
                word, latestscanchartdata)
            html_code += await graph.drawscattergraphscanhistory(
                word, scanhistorydomain)
            html_code += await generator.generatepluginscanstatistics(
                pluginscanstatistics)
            html_code += '<p><span style="color: #000000;">Report generated on ' + str(
                datetime.datetime.now()) + '</span></p>'
            html_code += '''
               </body>
               </html>
               '''
        except Exception as e:
            print(e)
            print(
                '\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m'
            )
            sys.exit(1)

        html_file = open(
            f'{filename}.html' if '.html' not in filename else filename, 'w')
        html_file.write(html_code)
        html_file.close()
        print('[*] Reporting finished.')
        print('[*] Saving files.')

        try:
            filename = filename.rsplit('.', 1)[0] + '.xml'

            with open(filename, 'w+') as file:
                file.write(
                    '<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
                for x in all_emails:
                    file.write('<email>' + x + '</email>')
                for x in full:
                    host, ip = x.split(':', 1) if ':' in x else (x, '')
                    if ip and len(ip) > 3:
                        file.write(
                            f'<host><ip>{ip}</ip><hostname>{host}</hostname></host>'
                        )
                    else:
                        file.write(f'<host>{host}</host>')
                for x in vhost:
                    host, ip = x.split(':', 1) if ':' in x else (x, '')
                    if ip and len(ip) > 3:
                        file.write(
                            f'<vhost><ip>{ip} </ip><hostname>{host}</hostname></vhost>'
                        )
                    else:
                        file.write(f'<vhost>{host}</vhost>')
                if shodanres != []:
                    shodanalysis = []
                    for x in shodanres:
                        res = x.split('SAPO')
                        file.write('<shodan>')
                        file.write('<host>' + res[0] + '</host>')
                        file.write('<port>' + res[2] + '</port>')
                        file.write('<banner><!--' + res[1] + '--></banner>')
                        reg_server = re.compile('Server:.*')
                        temp = reg_server.findall(res[1])
                        if temp:
                            shodanalysis.append(res[0] + ':' + temp[0])
                        file.write('</shodan>')
                    if shodanalysis:
                        shodanalysis = sorted(set(shodanalysis))
                        file.write('<servers>')
                        for x in shodanalysis:
                            file.write('<server>' + x + '</server>')
                        file.write('</servers>')

                file.write('</theHarvester>')

            print('[*] Files saved.')
        except Exception as er:
            print(
                f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m'
            )
        print('\n\n')
        sys.exit(0)