예제 #1
0
def get_game_data(username):
    wishlist_url = 'http://steamcommunity.com/id/%s/games/?tab=all' % (username,)

    response = urllib.request.urlopen(wishlist_url)
    html_data = response.read().decode('utf-8')
    doc = html.document_fromstring(html_data)
    translator = HTMLTranslator()
    row_selector = translator.css_to_xpath('script[language=javascript]')

    games = None
    for el in doc.xpath(row_selector):
        variables = parse_script(el.text_content())
        for variable in variables:
            if variable.identifier.value == 'rgGames':
                games = variable

    return[to_map(item) for item in games.initializer.items]
예제 #2
0
base_url = "http://etymonline.com/index.php?term={}"
etymologynr = int(vim.eval("bufwinnr('^etymology$')"))
word_to_look_up = sys.argv[0]
term_start = "{} {{{{{{"
term_end = "}}}"

if etymologynr > -1:
    vim.command('{}wincmd w'.format(etymologynr))
else:
    vim.command('silent keepalt belowright split etymology')

vim.command('setlocal noswapfile nobuflisted nospell nowrap modifiable')
vim.command('setlocal buftype=nofile bufhidden=hide')
vim.command('setlocal foldmethod=marker textwidth=80 wrapmargin=0')

term_xpath = etree.XPath(htmltrans.css_to_xpath('dt'))
linkfixes = etree.XPath(htmltrans.css_to_xpath("a.crossreference"))
foreignfixes = etree.XPath(htmltrans.css_to_xpath("span.foreign"))

definitions = html.parse(base_url.format(uparse.quote_plus(word_to_look_up)))
lines = []
for foreignfix in foreignfixes(definitions):
    foreignfix.text = "<<{}>>".format(foreignfix.text)
for linkfix in linkfixes(definitions):
    linkfix.text = ">>{}<<".format(linkfix.text)
for term_el in term_xpath(definitions):
    term_name = term_el[0].text
    lines.append(term_start.format(term_name))
    lines.extend(term_el.getnext().text_content().split('\r\n'))
    lines.append(term_end)
예제 #3
0
import lxml.html
from cssselect import HTMLTranslator
import re
from scraper.models import FilmDict
from scraper.utils import (decode_html, unicode_normalize,
                           clean_string, string_to_list,
                           correct_countries_list)
from cinema.utils import (titlecase, country_title)

html_translator = HTMLTranslator()
META_XPATH = html_translator.css_to_xpath('header.carousel-caption > h6')
ANCHOR_XPATH = html_translator.css_to_xpath('ul.thumbnails > li .thumbnail > a:nth-of-type(1)')
SYNOPSIS_GRAPHS_XPATH = "//div[@class='lead']/p"
DESCRIPTION_GRAPHS_XPATH = '//article/h4[2]/following-sibling::p'
DIRECTOR_REG = r'dir\.\s+([^\d]+)'
COUNTRIES_REG = r'(?:\,\s+(\w[\'\w\s]+)+)'


class HTMLScraper:
    """docstring for HTMLScraper"""
    def __init__(self, raw_html, source_url=None):
        super(HTMLScraper, self).__init__()
        self.source_url = source_url
        self.raw_html = raw_html
        self._tree = None

    @property
    def tree(self):
        if self._tree is None:
            self._tree = self.make_tree()
        return self._tree
예제 #4
0
base_url="http://etymonline.com/index.php?term={}"
etymologynr = int(vim.eval("bufwinnr('^etymology$')"))
word_to_look_up = sys.argv[0]
term_start = "{} {{{{{{"
term_end = "}}}"

if etymologynr > -1:
	vim.command('{}wincmd w'.format(etymologynr))
else:
	vim.command('silent keepalt belowright split etymology')

vim.command('setlocal noswapfile nobuflisted nospell nowrap modifiable')
vim.command('setlocal buftype=nofile bufhidden=hide')
vim.command('setlocal foldmethod=marker textwidth=80 wrapmargin=0')

term_xpath = etree.XPath(htmltrans.css_to_xpath('dt'))
linkfixes = etree.XPath(htmltrans.css_to_xpath("a.crossreference"))
foreignfixes = etree.XPath(htmltrans.css_to_xpath("span.foreign"))

definitions = html.parse(base_url.format(uparse.quote_plus(word_to_look_up)))
lines = []
for foreignfix in foreignfixes(definitions):
	foreignfix.text="<<{}>>".format(foreignfix.text)
for linkfix in linkfixes(definitions):
	linkfix.text=">>{}<<".format(linkfix.text)
for term_el in term_xpath(definitions):
	term_name = term_el[0].text
	lines.append(term_start.format(term_name))
	lines.extend(term_el.getnext().text_content().split('\r\n'))
	lines.append(term_end)
예제 #5
0
파일: enact.py 프로젝트: ohpauleez/enact
 def cssToXpath(css_selector, translator=None):
     if not translator:
         translator = HTMLTranslator()
     return translator.css_to_xpath(css_selector)