def getMails(soup, save=0): _soup_instance = bs4.BeautifulSoup if isinstance(type(soup), type(_soup_instance)): emails = [] for link in soup.find_all('a'): email_link = link.get('href') if email_link != None: if 'mailto' in email_link: """Split email address on""" email_addr = email_link.split(':') emails.append(email_addr[1]) else: pass """Pretty print output as below""" print('') print(Bcolors.OKGREEN + 'Mails Found - ' + Bcolors.ENDC + str(len(emails))) print('-------------------------------') for mail in emails: print(mail) if save: saveJson("Extracted-Mail-IDs", emails) return '' else: raise (Bcolors.FAIL + 'Method parameter is not of instance bs4.BeautifulSoup' + Bcolors.ENDC)
def main(): """ TorBot's Core """ args = get_args() connect(args.ip, args.port) print("--------------------------------", type(args.depth)) print("--------------------------------", args.depth) # If flag is -v, --update, -q/--quiet then user only runs that operation # because these are single flags only if args.version: print("TorBot Version:" + __VERSION) exit() if args.update: updateTor() exit() if not args.quiet: header() # If url flag is set then check for accompanying flag set. Only one # additional flag can be set with -u/--url flag if args.url: try: node = LinkNode(args.url) except (ValueError, HTTPError, ConnectionError) as err: raise err LinkIO.display_ip() # -m/--mail if args.mail: print(node.emails) result_emails = {'emails':node.emails} if saveJson('Emails', node.emails) # -i/--info if execute_all(node.uri) if print('Nothing to save.\n') if args.visualize: if args.depth: tree = LinkTree(node, stop_depth=args.depth) else: tree = LinkTree(node) if tree = LinkTree(node) file_name = str(input("File Name (.pdf/.png/.svg): ")) else: '''print("--------------------------------", type(node.links)) print("--------------------------------", node.links)''' LinkIO.display_children(node) print(LinkIO.results) if saveJson("Links", node.links) else: print("usage: See -h for possible arguments.") print("\n\n")
def getLinks(soup, ext, live=0, save=0): _soup_instance = bs4.BeautifulSoup extensions = [] if ext: for e in ext: extensions.append(e) if isinstance(type(soup), type(_soup_instance)): websites = [] start_time = time.time() for link in soup.find_all('a'): web_link = link.get('href') if web_link != None: if ('http' in web_link or 'https' in web_link): if ext: for exten in extensions: if web_link.endswith(exten): websites.append(web_link) else: websites.append(web_link) else: pass """Pretty print output as below""" print('') print(Bcolors.OKGREEN + 'Websites Found - ' + Bcolors.ENDC + str(len(websites))) print('-------------------------------') if live: threads = [] result = [{} for x in websites] for web in websites: t = threading.Thread(target=link_status, args=(web, result, websites.index(web))) t.start() threads.append(t) try: for t in threads: t.join() if save: saveJson("Live-Onion-Links", result) except: pass else: for web in websites: print(web) if save: saveJson("Onion-Links", websites) return websites else: raise ('Method parameter is not of instance bs4.BeautifulSoup')
def test_save_links_successful(): mock_data = [ '', '', '', '' ] try: file_name = savefile.saveJson('Links', mock_data) mock_output = {'Links': mock_data} with open('test_file.json', 'w+') as test_file: json.dump(mock_output, test_file, indent=2) os.chdir(os.getcwd()) assert os.path.isfile(file_name) is True mock_file = open(file_name, 'r') test_file = open('test_file.json', 'r') mock_data = test_data = finally: os.remove(file_name) os.remove('test_file.json') assert mock_data == test_data
def main(): args = get_args() connect(args.ip, args.port) link = args.url # If flag is -v, --update, -q/--quiet then user only runs that operation # because these are single flags only if args.version: print("TorBot Version:" + __VERSION) exit() if args.update: updater.updateTor() exit() if not args.quiet: header() # If url flag is set then check for accompanying flag set. Only one # additional flag can be set with -u/--url flag if args.url: print("Tor IP Address :", pagereader.get_ip()) html_content, response = pagereader.read_first_page(link) print("Connection successful.") # -m/--mail if args.mail: emails = getemails.getMails(html_content) print(emails) if savefile.saveJson('Emails', emails) # -i/--info elif info.executeAll(link, html_content, response) if print('Nothing to save.\n') else: links = getweblinks.get_links(soup=html_content,, ext=args.extension) if savefile.saveJson("Links", links) else: print("usage: [-h] [-v] [--update] [-q] [-u URL] [-s] [-m]", "[-e EXTENSION] [-l] [-i]") print("\n\n")
def main(conn=False): if conn: connect(LOCALHOST, PORT) parser = argparse.ArgumentParser() parser.add_argument("-v", "--version", action="store_true", help="Show current version of TorBot.") parser.add_argument("--update", action="store_true", help="Update TorBot to the latest stable version") parser.add_argument("-q", "--quiet", action="store_true") parser.add_argument("-u", "--url", help="Specifiy a website link to crawl") parser.add_argument("-s", "--save", action="store_true", help="Save results in a file") parser.add_argument("-m", "--mail", action="store_true", help="Get e-mail addresses from the crawled sites") parser.add_argument("-e", "--extension", action='append', dest='extension', default=[], help=' '.join( ("Specifiy additional website extensions", "to the list(.com , .org etc)"))) parser.add_argument("-l", "--live", action="store_true", help="Check if websites are live or not (slow)") parser.add_argument("-i", "--info", action="store_true", help=' '.join(("Info displays basic info of the", "scanned site, (very slow)"))) args = parser.parse_args() link = args.url # If flag is -v, --update, -q/--quiet then user only runs that operation # because these are single flags only if args.version: print("TorBot Version:" + __VERSION) exit() if args.update: updater.updateTor() exit() if not args.quiet: header() # If url flag is set then check for accompanying flag set. Only one # additional flag can be set with -u/--url flag if args.url: print("Tor IP Address :", pagereader.get_ip()) html_content = pagereader.readPage(link, args.extension) # -m/--mail if args.mail: emails = getemails.getMails(html_content) print(emails) if savefile.saveJson('Emails', emails) # -i/--info elif info.executeAll(link) if print('Nothing to save.\n') else: links = getweblinks.getLinks(soup=html_content,, ext=args.extension) if savefile.saveJson("Links", links) else: print( "usage: [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] [-l] [-i]" ) print("\n\n")