Ejemplo n.º 1
0
def html_parsing():
    response = requests.get('https://www.debian.org/releases/stable')
    root = HTML(response.content)
    print([e.tag for e in root])
    print(root.find('head').find('title').text)
    print(root.find('body').findall('div')[1].find('p').text)

    print(root.xpath('body'))
    print(root.xpath('body/div'))
    print(root.xpath('//h1'))
    print(root.find('head').xpath('.//h1'))
    print(root.xpath('//div[@id="content"]'))
    print(root.xpath('//div[h1]'))
    print(root.xpath('body/div[2]'))
Ejemplo n.º 2
0
def edit_message(base_url, username, password, message_id, new_body):
    url_opener = _utils.login_and_go_to_faq(base_url, username, password)

    # calculate some more URLs
    faq_url = urljoin(base_url, "faq.php")
    edit_url = urljoin(base_url, "misc.php")

    # go to the FAQ page (page with low backend complexity) to get the security token
    print("fetching security token")
    faq_response = url_opener.open(faq_url)
    faq = HTML(faq_response.read())
    token_field = faq.find(".//input[@name='securitytoken']")
    security_token = token_field.attrib["value"]

    # encode the message
    request_string = \
        "do=vsacb_editmessage&s=&securitytoken={0}&id={1}&vsacb_editmessage={2}".format(
            security_token, message_id, encode_outgoing_message(new_body)
        )
    request_bytes = request_string.encode(server_encoding)

    print("updating message")
    edit_response = url_opener.open(edit_url, data=request_bytes)
    edit_response.read()

    print("done")
Ejemplo n.º 3
0
def link_tag_url(html):
    '''
    extracts a relative url from an HTML document's link tag, like

        <link rel="shortcut icon" href="images-template/favicon.ico" type="image/x-icon" />

    '''
    from lxml.etree import HTML
    doc = HTML(html)
    link_tag = doc.find('.//link[@rel="shortcut icon"]')
    if link_tag is not None:
        favicon_url = link_tag.get('href', '')
        if favicon_url:
            return favicon_url
Ejemplo n.º 4
0
def link_tag_url(html):
    '''
    extracts a relative url from an HTML document's link tag, like

        <link rel="shortcut icon" href="images-template/favicon.ico" type="image/x-icon" />

    '''
    from lxml.etree import HTML
    doc = HTML(html)
    link_tag = doc.find('.//link[@rel="shortcut icon"]')
    if link_tag is not None:
        favicon_url = link_tag.get('href', '')
        if favicon_url:
            return favicon_url
Ejemplo n.º 5
0
def fake(base_url, username, password, game_id, time, score, game_name=None):
    url_opener = _utils.login_and_enter_arcade(base_url, username, password)

    # calculate some more URLs
    play_game_url = urljoin(base_url, "arcade.php?do=play&gameid={0}".format(game_id))
    score_url = urljoin(base_url, "index.php?act=Arcade&do=newscore")

    # pretend to play the game
    print("playing the game")
    play_game_response = url_opener.open(play_game_url)
    play_game = HTML(play_game_response.read())

    if game_name is None:
        # (meanwhile, find the game's name)
        game_flash = play_game.find(".//embed[@type='application/x-shockwave-flash']")
        if game_flash is None:
            print("didn't find the flash plugin on the game page :'-(")
            return

        flash_vars = game_flash.attrib['flashvars'].split("&")
        for var in flash_vars:
            if var.startswith("gamename="):
                game_name = var[len("gamename="):]

    if game_name is None:
        print("game name not found :'-(")
        return

    # wait the given time
    print("waiting")
    sleep(time)

    post_values = {
        "gscore": score,
        "gname": game_name
    }
    post_data = _utils.encode_post_data(post_values)
    print("submitting fake score")
    score_response = url_opener.open(score_url, data=post_data)
    score_response.read()

    print("done")
Ejemplo n.º 6
0
# screen scraping
import re
import requests
from lxml.etree import HTML

response = requests.get('https://www.debian.org/releases/stable')
root = HTML(response.content)
title_text = root.find('head').find('title').text
release = re.search('\u201c(.*)\u201d', title_text).group(1)
p_text = root.xpath('//div[@id="content"]/p[1]')[0].text
version = p_text.split()[1]
print('Codename: {}\nVersion: {}\n'.format(release, version))

Ejemplo n.º 7
0
def fake(base_url, username, password, game_id, time, score, tourney_id, game_name=None, rung=None,
         face_off=None):
    url_opener = _utils.login_and_enter_arcade(base_url, username, password)

    # calculate some more URLs
    tourneys_url = urljoin(base_url, "arcade.php?&do=viewtournaments")
    view_tourney_url = urljoin(base_url, "arcade.php?&act=Arcade&do=viewtourney&tid={0}".format(
        tourney_id
    ))
    play_tourney_game_url = urljoin(
        base_url,
        "arcade.php?&do=playtourney&gameid={0}&tid={1}{2}{3}".format(
            game_id, tourney_id,
            "&rung={0}".format(rung) if rung is not None else "",
            "&faceoff={0}".format(face_off) if face_off is not None else ""
        )
    )
    score_url = urljoin(base_url, "index.php?act=Arcade&do=newscore")

    # go to tourneys
    print("entering tourneys page")
    tourneys_response = url_opener.open(tourneys_url)
    tourneys_response.read()

    # view the tourney
    print("looking at the tourney")
    view_tourney_response = url_opener.open(view_tourney_url)
    view_tourney_response.read()

    # pretend to play the game
    print("playing the game")
    play_tourney_game_response = url_opener.open(play_tourney_game_url)
    play_tourney_game = HTML(play_tourney_game_response.read())

    if game_name is None:
        # (meanwhile, find the game's name)
        game_flash = play_tourney_game.find(".//embed[@type='application/x-shockwave-flash']")
        if game_flash is None:
            print("didn't find the flash plugin on the game page :'-(")
            return

        flash_vars = game_flash.attrib['flashvars'].split("&")
        for var in flash_vars:
            if var.startswith("gamename="):
                game_name = var[len("gamename="):]

    if game_name is None:
        print("game name not found :'-(")
        return

    # wait the given time
    print("waiting")
    sleep(time)

    post_values = {
        "gscore": score,
        "gname": game_name
    }
    post_data = _utils.encode_post_data(post_values)
    print("submitting fake score")
    score_response = url_opener.open(score_url, data=post_data)
    score_response.read()

    print("done")
#!/usr/bin/env python3

import re
import requests
from lxml.etree import HTML

response = requests.get('http://www.debian.org/releases/stable/')
root = HTML(response.content)
title_text = root.find('head').find('title').text
release = re.search('\u201c(.*)\u201d', title_text).group(1)
p_text = root.xpath('//div[@id="content"]/p[1]')[0].text
version = p_text.split()[1]
print('Codename: {}\nVersion: {}'.format(release, version))
Ejemplo n.º 9
0
import re
import requests
from lxml.etree import HTML

response = requests.get("http://www.debian.org/releases/stable/")
root = HTML(response.content)
title_text = root.find("head").find("title").text
release = re.search("\u201c(.*)\u201d", title_text).group(1)
p_text = root.xpath("//div[@id='content']/p[1]")[0].text
version = p_text.split()[1]

print("Codename:{}\nVersion: {}".format(release, version))