def searchGoogle(): infoMsg = "[INFO] google dorking is running, please wait...\n" cetakData(infoMsg) dork, page = konf.target page = page if page > 1 else 1 # atur kembali konf.googleDork = dork data = { "q": dork, "num": 100, "hl": "en", "complete": 0, "safe": "off", "filter": 0, "btnG": "search", "start": page } url = "https://www.google.com/search?" + urllib.urlencode(data) response = UserAgent.open(url) htmltext = response.read() if re.search("(?i)captcha", htmltext): criMsg = "can't get dorking results. " criMsg += "captcha challenge detected" logger.critical(criMsg) raise W3bruteNextStepException soup = BeautifulSoup(htmltext) h3tags = soup.findAll("h3", attrs={"class": "r"}) urls = [ urlparse.parse_qsl(urlparse.urlsplit(tag.a["href"]).query)[0][1] for tag in h3tags ] return urls or None
#!/usr/bin/env python #coding=utf-8 from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="title"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ soup = BeautifulSoup(html_doc) #print(soup.prettify()) print(soup.findAll('a'))