Exemple #1
0
def crawl (url, host):
    try:
        if host=="" :
            host= url
        req = Requester.RequestUrl('9050','','no',url.strip())
        if (req.status_code!=200):
            return []
        links = link_re.findall(req.text)
        url=url.strip()

        for l in links:
            exp = re.findall('/([^/]+\.(?:jpg|gif|png|pdf|css|js|zip|doc|docx|rar))', l)
            if (l ==url)  or l in set(dirs): continue
            #if "http" in l : continue

            if  (host in l ==False) :uri = urljoin(host,l)
            else:uri=l
            if uri in set(result)  or len(exp) > 0: continue
            result.append(uri)
            print(uri)
            dirs.append(l)
            t = threading.Thread(target=crawl, args=(uri,host,))
            threads.append(t)
            try:
                try:
                    t.start()
                    time.sleep(0.1)
                except:
                    time.sleep(0.2)
            except (KeyboardInterrupt, SystemExit):

                print(Fore.RED, " [-] Ctrl-c received! Sending kill to threads...")
                for t in threads:
                    t.kill_received = True
                sys.exit()


    except:return []
Exemple #2
0
def getUrls(search_string , tor,port):
    temp= []
    url = 'https://www.google.com/search?q='
    r = Requester.RequestUrl(port, search_string, tor, url)
    try:
        soup = BeautifulSoup(r.text, 'html.parser')
    except:
        return
    h3tags= soup.find_all( 'h3' )

    if (len(h3tags)>0):
        for h3 in h3tags:
            try:
                pos= parse.unquote(h3.find('a').get('href').replace("/url?q=", '')).find("&sa")
                ut = parse.unquote(h3.find('a').get('href').replace("/url?q=", '').replace('&lang=en',''))

                if (ut.find('&lang=en')>-1):ut.replace('&lang=en','')
                if (ut.find('http://www.google.com/url?url=')>-1): ut.replace('http://www.google.com/url?url =', '')
                if (ut.find('&rct=j&q=&esrc=s') > -1): ut.replace('&rct=j&q=&esrc=s', '')


                if (pos>-1):
                    s =0
                    ur =""
                    for ch in ut:
                        if (s<pos):ur =ur+ch
                        else:break
                        s =s +1
                    temp.append(ur)

                else:temp.append(ut)


            except:
                #print("[-] there is a problem")
                continue

    return temp