Ejemplo n.º 1
0
html = beerlib.download_html('https://maltworm.cz/dnes-na-cepu/')
if not html:
    exit(-1)

reg = re.compile('(<body.*</body>)', re.MULTILINE | re.DOTALL)
body = reg.search(html).group(0)
content = re.sub('<script.*</script>',
                 '',
                 body,
                 flags=re.MULTILINE | re.DOTALL)

table = ET.XML(content)
articles = table.findall(".//article")

headers = ['Pivo', 'Typ', 'EPM', 'Alk.', 'IBU', 'Pivovar', 'Město']
output = []
for article in articles:
    beer = article.find(
        ".//p[@class='elementor-heading-title elementor-size-default']")
    info = article.findall(".//span[@class='elementor-icon-list-text']")
    info = iter(info)
    values = [beer.text] + ["".join(i.itertext()) for i in info]

    # get rid of 'IBU:' prefix
    ibu_pos = headers.index('IBU')
    values[ibu_pos] = values[ibu_pos].replace('IBU: ', '')

    output = output + [values]

beerlib.parser_output(output, headers, 'Malt Worm', sys.argv)
Ejemplo n.º 2
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from xml.etree import ElementTree as ET
import re, sys
import common as beerlib

html = beerlib.download_html('http://ochutnavkovapivnice.cz/prave_na_cepu/')
if not html:
    exit(-1)

reg = re.compile('(<table.*</table>)', re.MULTILINE | re.DOTALL)
html = reg.search(html)
if not html:
    exit(-2)
html = html.group(0)
html = html.replace('&', '&#038;')
table = ET.XML(html)

rows = iter(table)
headers = [col[0].text for col in next(rows)]
output = []
for row in rows:
    tds = iter(row)
    beer = next(tds)[0][0].text
    values = [beer] + [col.text for col in tds]
    output = output + [values]

beerlib.parser_output(output, headers, 'Ochutnávková pivnice', sys.argv)
Ejemplo n.º 3
0
	paragraphs = reg.findall(post_html)

	# Hope that some paragraph of post contains beers
	for p in paragraphs:
		beers = ET.XML(p)

		# Nothing? Give up
		if not beers:
			continue

		beers = list(beers.itertext())

		# Hope that the beer list format is the same
		headers = ['Pivo', 'Alk.', 'Pivovar', 'Typ']
		output = []
		for line in beers:
			# Black Label #4 8,1% (Raven, Wild Ale)
			m = re.match(' *(.+?)(?: -)? +([0-9,\.]+%) +\(([^,]+), ?([^\)]+)\)?', line)
			if not m:
				# Zlaté Prasátko 6,5%
				m = re.match(' *(.+?)(?: -)? +([0-9,\.]+%)()()', line)
			if m:
				output = output + [list(m.groups())]

		if output:
			beerlib.parser_output(output, headers, 'Craftbeer bottle shop & bar', sys.argv)
			exit(0)

# nothing was found
exit(1)