class AltOnionDir: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.source = 'Alt OnionDir' compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:AltOnionDir') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.alt_onionDir() def alt_onionDir(self): url = 'http://onionf3ck2i74bmm.onion' self.logger.info(' Conectando em {url}'.format(url=url)) request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") pages = [] for raw in soup.find('navbar', {'id': 'content-navbar'}).findAll('a'): if '.html' in raw['href'].lower(): pages.append("{url}/{page}".format(url=url, page=raw['href'])) for urls in pages: try: request = self.session.get(urls, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") next = [] for paginator in soup.find('ul', { 'id': 'paginator' }).findAll('a'): next.append("{url}/{page}".format( url=url, page=paginator['href'].replace('..', ''))) for nextpage in next: self.logger.info( ' Realizando scraping em {url}'.format(url=nextpage)) try: request = self.session.get(nextpage, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") list_urls = [] for raw in soup.find('div', { 'class': 'generic-page' }).findAll('footer'): for get_onion in raw.findAll('a'): list_urls.append(get_onion['href']) regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in list_urls: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion(url=xurl.group(), source=self.source) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass
class GistAPI: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.logger = logging.getLogger('Class:GistAPI') # TODO: QuickStart logging.basicConfig(level=logging.INFO) self.source = 'gist' compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) # TODO: Arquivo de configuração self.argument = '.onion' self.url = 'https://gist.github.com/search?l=Text&q=' self.desktop_agents = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0' ] # Seleciona um agent aleatório de acordo com a lista. @property def random_headers(self): return { 'User-Agent': choice(self.desktop_agents), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } @property def start(self): #self.database.replaces() self.cookies() self.pagination() self.scraping() self.raw() def cookies(self): self.logger.info(' Iniciando Scrap no Gist.') with requests.Session() as self.session: self.headers = self.random_headers request = self.session.get(self.url + self.argument, headers=self.headers) if request.status_code == 200: pass else: GistAPI.start def pagination(self): # Converte o keyword para a leitura de URL. self.query = urllib.parse.quote(self.argument) full_url = self.url + self.argument self.logger.info(' Conectando em {}'.format(full_url)) time.sleep(5) request = self.session.get(full_url, headers=self.headers) self.soup = BeautifulSoup(request.content, features="lxml") pages = [] self.urls = [full_url] # Verifica se existe mais de uma página no resultado de pesquisa. try: for pagination in self.soup.find('div', { 'class': 'pagination' }).findAll('a'): pages.append(pagination.get_text()) except: pages = False # Se caso tiver mais de uma pagina de resultado, será criada uma lista com todos os resultados. if pages: cont = 2 while cont <= int(pages[-2]): cont += 1 full_url = 'https://gist.github.com/search?l=Text&p={pagination}&q={query}'.format( query=self.query, pagination=cont - 1) self.urls.append(full_url) def scraping(self): # Inicia o scraping em URL por URL url = [] for inurl in self.urls: self.logger.info(' Conectando em {}'.format(inurl)) time.sleep(5) request = self.session.get(inurl, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for code in soup.findAll('div', {'class': 'gist-snippet'}): if self.argument in code.get_text().lower(): for raw in code.findAll('a', {'class': 'link-overlay'}): try: url.append(raw['href']) except: pass self.urls_raw = [] for get in url: self.logger.info(' Conectando em {}'.format(get)) time.sleep(5) try: request = self.session.get(get, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.findAll('a', {'class': 'btn btn-sm'}): try: gist_url = "{url}{gist}".format( url="https://gist.githubusercontent.com", gist=raw['href']) self.urls_raw.append(gist_url) except: pass except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass def raw(self): self.logger.info(' Realizando os replaces e regex. AGUARDE...') itens = [] for raw in self.urls_raw: if '.txt' in raw.lower(): time.sleep(5) request = self.session.get(raw, headers=self.headers) self.soup = BeautifulSoup(request.content, features="lxml") for pre in self.soup.findAll('body'): list = pre.get_text().split('\n') itens.extend(list) regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in itens: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') url = regex.match(rurls) if url is not None: self.database.saveonion(url=url.group(), source=self.source)
class UnderDir: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.source = 'UnderDir' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:UnderDir') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.underdir() def underdir(self): url = 'http://underdj5ziov3ic7.onion' self.logger.info(' Conectando em {url}'.format(url=url)) request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('div', { 'id': 'incore' }).findAll('div', {'class': 'fr_m'}): for category in raw.findAll('a'): url_list = "{url}{category}".format(category=category['href'], url=url) self.logger.info( ' Realizando scraping em {url}'.format(url=url_list)) request = self.session.get(url_list, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features='lxml') pages = [] for raw in soup.find('div', {'class': 'pgn'}).findAll('a'): pages.append(raw.get_text()) cont = 2 urls = [url_list] while cont <= int(pages[-2]): cont += 1 urls.append("{url}/pg/{number}".format(url=url_list, number=cont - 1)) for get in urls: self.logger.info(' Conectando em {url}.'.format(url=get)) try: request = self.session.get(get, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features='lxml') itens = [] for raw in soup.find('div', { 'class': 'trr' }).findAll('a'): itens.append(raw['href'].replace('http://', '')) regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in itens: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') urls = regex.match(rurls) if urls is not None: self.database.saveonion(url=urls.group(), source=self.source) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass
class GoogleAPI: def __init__( self, host_db=None, user_db=None, password_db=None, database=None, api_key=None, cse_id=None, ): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.api_key = api_key self.cse_id = cse_id self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.logger = logging.getLogger('Class:GoogleAPI') self.source = 'Google' compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.session = requests.session() self.desktop_agents = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0' ] @property def random_headers(self): return { 'User-Agent': choice(self.desktop_agents), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } @property def start(self): self.urls() def google_search(self, search_term, **kwargs): service = build("customsearch", "v1", developerKey=self.api_key, cache_discovery=False) try: res = service.cse().list(q=search_term, cx=self.cse_id, **kwargs).execute() next_response = service.cse().list( q=search_term, cx=self.cse_id, num=10, start=3, ).execute() return res except: return None def text(self, url=None): if url is not None: try: request_pages = self.session.get('{}'.format(url), headers=self.random_headers, timeout=500) if request_pages.status_code == 200: soup = BeautifulSoup(request_pages.content, features="lxml") for s in soup(['script', 'style']): s.decompose() return ' '.join(soup.stripped_strings) except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError) as e: return None def urls(self): search = self.google_search('intext:.onion', num=10, start=1) if search is not None: number_pages_search = int( search['queries']['request'][0]['totalResults']) // 10 cont = 1 urls = [] while cont <= number_pages_search: cont += 1 search = self.google_search('intext:.onion', num=10, start=cont) if search is not None: for result in search: if 'items' in result: texts = [] for results in search[result]: texts.append( self.text(url=results['formattedUrl'])) if texts is not None: regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in texts: if lines is not None: for split_lines in lines.split(r' '): replace_urls = split_lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') url = regex.match(replace_urls) if url is not None: self.database.saveonion(url=url.group(), source=self.source)
class TORCH: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.source = 'TORCH' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:TORCH') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } self.url = 'http://xmh57jrzrnw6insl.onion' self.desktop_agents = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0' ] # Seleciona um agent aleatório de acordo com a lista. @property def random_headers(self): return { 'User-Agent': choice(self.desktop_agents), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } @property def start(self): self.pages() def pages(self): keywords = [ 'forum', 'press', 'search', 'introduction', 'arabic', 'chinese', 'french', 'german', 'italian', 'japanese', 'polish', 'portuguese', 'russians', 'Spanish', 'hardcore', 'softcore', 'erotica', 'fetish', 'violence', 'escorts', 'p**n', 'domains', 'file', 'pastebin', 'proxies', 'web', 'blog', 'books', 'bbs', 'chans', 'wiki', 'social', 'Social', 'activism', 'paranormal', 'politics', 'religion', 'whistleblowing', 'development', 'c++', 'c#', 'python', 'HTML', 'ruby', 'jupyter', 'java', 'javascript', 'java', 'hacker', 'blackbox', 'read', 'blackhat', 'cracked', 'wordlist', 'word', 'hacked', 'blueteam', 'Phishing', 'Malware', 'Lamer', 'Cracker', 'Defacer', 'Spyware', 'Scammers', 'DDOS', 'SQL', 'sql', 'Botnet', 'Exploit', 'Script', 'zero', '0day', 'zeroday', 'Cybersecurity', 'Cyber', 'Hacktivism', 'Hacktivist', 'Keylogger', 'Blacklist', 'ai', 'bitcoin', 'Equifax', 'Nessus', 'openvas', 'securitycenter', 'Truecrypt', 'ClamAV', 'OSSEC', 'paloalto', 'BackTrack', 'OSSIM', 'IPCop', 'Okta', 'sonicwall', 'pfsense', 'Metasploit', 'OpenSSH', 'Wireshark', 'NStealth', 'drugs', 'drug-shop', 'Acid', 'Asteroid', 'Berry', 'Poker', 'games', 'Multiplayer', 'Play', 'activism', 'Casino', '.mp3', '.mp4', 'Video', 'Filme', 'Movie', 'channel', 'message', 'conclusion', 'termination', 'heading', 'headline', 'english', 'mandarin', 'hindustani', 'arabic', 'malay', 'bengali', 'sex', 'sexy', 'sexo', 'sexual', 'LGBT', 'Abuse', 'local', 'ebook', 'ebooks', 'social', 'christianity', 'islam', 'nonreligious', 'secular', 'secular', 'agnostic', 'atheist', 'hinduism', 'buddhism', 'spiritism', 'judaism', 'primal-indigenous', 'php', 'visual', 'C++', 'delphi', 'pascal', 'cobol', 'Cyberark', 'Firewall', 'antivirus', 'marijuana', 'weed', 'cocaine', 'heroin', 'cannabis', 'crack', 'ecstasy', 'amphetamines', 'lsd', 'singleplayer', 'TV', 'television', 'radio', ] self.headers = self.random_headers self.logger.info(' Conectando em {}'.format(self.url)) urls = [] self.logger.info(' Gerando URLS') for term in keywords: cont = 0 while cont <= 9: cont += 1 url_page = "{url}/4a1f6b371c/search.cgi?cmd=Search!&fmt=url&form=extended&GroupBySite=no&m=all&np={number}&ps=50&q={term}&sp=1&sy=1&type=&ul=&wf=2221&wm=wrd" \ .format( url=self.url, number=cont, term=term) urls.append(url_page) self.logger.info( ' Conectando nas paginas, e coletando URLS. AGUARDE...') for number_pages in urls: self.logger.debug(' Conectando em {}'.format(number_pages)) try: request = self.session.get(number_pages, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for dt in soup.findAll('dt'): for dt_a in dt.findAll('a'): rurls = dt_a.get_text() \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') full_url = regex.match(rurls) if full_url is not None: self.database.saveonion(url=full_url.group(), source=self.source) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass
class CyberSecurityNews: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.logger = logging.getLogger('Class:CyberSecurityNews') # TODO: QuickStart logging.basicConfig(level=logging.INFO) self.source = 'CyberSecurityNews-Pastebin' compare_sorce = self.database.compare_source(source=self.source) self.session = requests.session() if compare_sorce: pass else: self.database.save_source(source=self.source) # TODO: Arquivo de configuração self.argument = '.onion' self.url = 'https://pastebin.com/u/cybersecuritynews/1' self.desktop_agents = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0'] # Seleciona um agent aleatório de acordo com a lista. @property def random_headers(self): return { 'User-Agent': choice(self.desktop_agents), 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } @property def start(self): self.database.replaces() self.pages() def pages(self): self.headers = self.random_headers self.logger.info(' Conectando em {}'.format(self.url)) time.sleep(2) request = self.session.get(self.url, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") pages_to_pages = [] for raw in soup.find('div', {'class': 'pagination'}).findAll('a'): pages_to_pages.append(raw.get_text()) cont = 2 pages_urls = [self.url] while cont <= int(pages_to_pages[-2]): cont +=1 pages_urls.append("https://pastebin.com/u/cybersecuritynews/{}".format(cont-1)) raw_urls = [] for get_urls in pages_urls: self.logger.info(' Conectando em {}'.format(get_urls)) request = self.session.get(get_urls, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'maintable'}).findAll('a'): if 'archive' in raw['href']: pass else: raw_urls.append("https://pastebin.com/raw{}".format(raw['href'])) itens = [] self.logger.info(' Realizando os replaces e regex. AGUARDE...') for raw in raw_urls: request = self.session.get(raw, headers=self.headers) self.soup = BeautifulSoup(request.content, features="lxml") for pre in self.soup.findAll('body'): list = pre.get_text().split('\n') itens.extend(list) regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in itens: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') url = regex.match(rurls) if url is not None: self.database.saveonion( url=url.group(), source=self.source)
class DiscoverDarkWebService: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.source = 'Discover Dark Web Hidden Service' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:DiscoverDarkWebService') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.discover_dark_web() def discover_dark_web(self): url = 'http://3bbaaaccczcbdddz.onion/discover' self.logger.info(' Conectando em {url}'.format(url=url)) try: request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") list_urls = [] for raw in soup.find('table', {'class': 'table'}).findAll('a'): list_urls.append(raw['href'].replace('/search?q=', '')) except(requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error(' Não consegui conectar na url, porque ocorreu um erro.\n{e}'.format(e=e)) pass self.logger.info(' Aplicando REGEX. Aguarde...') regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in list_urls: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion( url=xurl.group(), source=self.source)
class FleshOnionsAPI: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.source = 'FlashOnions' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:FlashOnions') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.flash_onion() def flash_onion(self): url = 'http://vps7nsnlz3n4ckiie5evi5oz2znes7p57gmrvundbmgat22luzd4z2id.onion/' self.logger.info(' Conectando em {url}'.format(url=url)) try: request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") pages = [] for number_pages in soup.find('div', {'class':'pagination'}).findAll('a'): pages.append(number_pages.get_text()) cont = 0 urls = [] while cont <= int(pages[-1]): cont += 1 urls.append("{url}?search_title_only=on&search=&rep=n%2Fa&page={number}".format( number=cont-1, url=url )) onions = [] for connect in urls: time.sleep(4) self.logger.info(' Conectando em {url}'.format(url=connect)) request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'domain_list'}).findAll('a'): if 'http://' in raw['href']: onions.append(raw['href']) keywords = [ 'forum', 'press', 'search', 'introduction', 'arabic', 'chinese', 'french', 'german', 'italian', 'japanese', 'polish', 'portuguese', 'russians', 'Spanish', 'hardcore', 'softcore', 'erotica', 'fetish', 'violence', 'escorts', 'p**n', 'domains', 'file', 'pastebin', 'proxies', 'web', 'blog', 'books', 'bbs', 'chans', 'wiki', 'social', 'Social', 'activism', 'paranormal', 'politics', 'religion', 'whistleblowing', 'development', 'c++', 'c#', 'python', 'HTML', 'ruby', 'jupyter', 'java', 'javascript', 'java', 'hacker', 'blackbox', 'read', 'blackhat', 'cracked', 'wordlist', 'word', 'hacked', 'blueteam', 'Phishing', 'Malware', 'Lamer', 'Cracker', 'Defacer', 'Spyware', 'Scammers', 'DDOS', 'SQL', 'sql', 'Botnet', 'Exploit', 'Script', 'zero', '0day', 'zeroday', 'Cybersecurity', 'Cyber', 'Hacktivism', 'Hacktivist', 'Keylogger', 'Blacklist', 'ai', 'bitcoin', 'Equifax', 'Nessus', 'openvas', 'securitycenter', 'Truecrypt', 'ClamAV', 'OSSEC', 'paloalto', 'BackTrack', 'OSSIM', 'IPCop', 'Okta', 'sonicwall', 'pfsense', 'Metasploit', 'OpenSSH', 'Wireshark', 'NStealth', 'drugs', 'drug-shop', 'Acid', 'Asteroid', 'Berry', 'Poker', 'games', 'Multiplayer', 'Play', 'activism', 'Casino', '.mp3', '.mp4', 'Video', 'Filme', 'Movie', 'channel', 'message', 'conclusion', 'termination', 'heading', 'headline', 'english', 'mandarin', 'hindustani', 'arabic', 'malay', 'bengali', 'sex', 'sexy', 'sexo', 'sexual', 'LGBT', 'Abuse', 'local', 'ebook', 'ebooks', 'social', 'christianity', 'islam', 'nonreligious', 'secular', 'secular', 'agnostic', 'atheist', 'hinduism', 'buddhism', 'spiritism', 'judaism', 'primal-indigenous', 'php', 'visual', 'C++', 'delphi', 'pascal', 'cobol', 'Cyberark', 'Firewall', 'antivirus', 'marijuana', 'weed', 'cocaine', 'heroin', 'cannabis', 'crack', 'ecstasy', 'amphetamines', 'lsd', 'singleplayer', 'TV', 'television', 'radio', ] for term in keywords: time.sleep(2) query = urllib.parse.quote(term) search = "{url}/?rep=n%2Fa&search={term}&submit=Go+%3E%3E%3E".format( url=url, term=term) self.logger.info(' Conectando em {url}'.format(url=search)) request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'domain_list'}).findAll('a'): if 'http://' in raw['href']: onions.append(raw['href']) self.logger.info(' Aplicando REGEX. Aguarde...') regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in onions: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion( url=xurl.group(), source=self.source) except(requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error(' Não consegui conectar na url, porque ocorreu um erro.\n{e}'.format(e=e)) pass