Ejemplo n.º 1
0
def searchGoogle():
    infoMsg = "[INFO] google dorking is running, please wait...\n"
    cetakData(infoMsg)

    dork, page = konf.target
    page = page if page > 1 else 1
    # atur kembali
    konf.googleDork = dork

    data = {
        "q": dork,
        "num": 100,
        "hl": "en",
        "complete": 0,
        "safe": "off",
        "filter": 0,
        "btnG": "search",
        "start": page
    }

    url = "https://www.google.com/search?" + urllib.urlencode(data)
    response = UserAgent.open(url)
    htmltext = response.read()

    if re.search("(?i)captcha", htmltext):
        criMsg = "can't get dorking results. "
        criMsg += "captcha challenge detected"

        logger.critical(criMsg)
        raise W3bruteNextStepException

    soup = BeautifulSoup(htmltext)
    h3tags = soup.findAll("h3", attrs={"class": "r"})
    urls = [
        urlparse.parse_qsl(urlparse.urlsplit(tag.a["href"]).query)[0][1]
        for tag in h3tags
    ]

    return urls or None
Ejemplo n.º 2
0
#!/usr/bin/env python
#coding=utf-8

from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup

html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

soup = BeautifulSoup(html_doc)
#print(soup.prettify())
print(soup.findAll('a'))