def crawl (url, host): try: if host=="" : host= url req = Requester.RequestUrl('9050','','no',url.strip()) if (req.status_code!=200): return [] links = link_re.findall(req.text) url=url.strip() for l in links: exp = re.findall('/([^/]+\.(?:jpg|gif|png|pdf|css|js|zip|doc|docx|rar))', l) if (l ==url) or l in set(dirs): continue #if "http" in l : continue if (host in l ==False) :uri = urljoin(host,l) else:uri=l if uri in set(result) or len(exp) > 0: continue result.append(uri) print(uri) dirs.append(l) t = threading.Thread(target=crawl, args=(uri,host,)) threads.append(t) try: try: t.start() time.sleep(0.1) except: time.sleep(0.2) except (KeyboardInterrupt, SystemExit): print(Fore.RED, " [-] Ctrl-c received! Sending kill to threads...") for t in threads: t.kill_received = True sys.exit() except:return []
def getUrls(search_string , tor,port): temp= [] url = 'https://www.google.com/search?q=' r = Requester.RequestUrl(port, search_string, tor, url) try: soup = BeautifulSoup(r.text, 'html.parser') except: return h3tags= soup.find_all( 'h3' ) if (len(h3tags)>0): for h3 in h3tags: try: pos= parse.unquote(h3.find('a').get('href').replace("/url?q=", '')).find("&sa") ut = parse.unquote(h3.find('a').get('href').replace("/url?q=", '').replace('&lang=en','')) if (ut.find('&lang=en')>-1):ut.replace('&lang=en','') if (ut.find('http://www.google.com/url?url=')>-1): ut.replace('http://www.google.com/url?url =', '') if (ut.find('&rct=j&q=&esrc=s') > -1): ut.replace('&rct=j&q=&esrc=s', '') if (pos>-1): s =0 ur ="" for ch in ut: if (s<pos):ur =ur+ch else:break s =s +1 temp.append(ur) else:temp.append(ut) except: #print("[-] there is a problem") continue return temp