def parse_url(url, today=False): canteen = LazyBuilder() document = parse(urlopen(url).read()) for day_div in document.find_all('div', 'day') + document.find_all( 'article', attrs={'data-day': True}): # parse date, warning: calculate year number needed date_test = day_regex.search(day_div['data-day']) if not date_test: print('Error: unable to parse date') continue else: year = datetime.datetime.now().year if datetime.datetime.now().month > int(date_test.group('month')): year += 1 # date from next year date = "{}-{}-{}".format( year, date_test.group('month'), date_test.group('day'), ) if 'nodata' in day_div.attrs.get('class', []) or 'GESCHLOSSEN' in day_div.text: canteen.setDayClosed(date) continue closed_candidate = False for meal_article in day_div.find_all('article', 'menu'): name = meal_article.find('div', 'title').text if not name: continue if 'geschlossen' in name: closed_candidate = True continue category = meal_article.find('div')['title'] notes = [ v['title'] for v in meal_article.find_all('div', 'theicon') if v['title'] ] if meal_article.find('div', 'additive'): notes += [ v[0] for v in extra_regex.findall( meal_article.find('div', 'additive').text) ] price_div = meal_article.find('div', 'price') if price_div is None: canteen.addMeal(date, category, name, notes) continue prices = {} for v, r in (('default', 'student'), ('bed', 'employee'), ('guest', 'other')): price = price_regex.search(price_div['data-' + v]) if price: prices[r] = price.group('price') elif v == 'default': prices = {} break canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed()
def parse_url(url, today=False): content = urlopen(url).read() document = parse(content, 'lxml') legends = document.find_all('div', {'class': 'legende'}) if len(legends) > 0: extraLegend = { int(v[0]): v[1] for v in reversed(legend_regex.findall(legends[0].text)) } else: extraLegend = {} canteen = LazyBuilder() for day_td in document.find_all('td', text=day_regex): date = day_regex.search(day_td.string).group('date') table = None for element in day_td.parents: if element.name == 'table': table = element break if not table: continue for tr in table.tbody.find_all('tr'): if 'geschlossen' in tr.text or 'Feiertage' in tr.text: match = day_range_regex.search(tr.text) if not match: canteen.setDayClosed(date) else: fromDate = datetime.datetime.strptime( match.group('from'), '%d.%m.%Y') toDate = datetime.datetime.strptime( match.group('to'), '%d.%m.%Y') while fromDate <= toDate: canteen.setDayClosed(fromDate.strftime('%Y-%m-%d')) fromDate += datetime.date.resolution continue if len(tr) != 2: continue # no meal strings = list(tr.contents[0].strings) name = strings[0] # prices: prices = strings[-1].split('|') if '-' in map(lambda v: v.strip(), prices): prices = {} # notes: notes = [] for img in tr.contents[1].find_all('img'): notes.append(img['alt'].replace('Symbol', '').strip()) for extra in list( set(map(lambda v: int(v), extra_regex.findall(tr.text)))): if extra in extraLegend: notes.append(extraLegend[extra]) canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None) return canteen.toXMLFeed()
def parsePlan(url, internalMensaId, today): canteen = LazyBuilder() end = False while (url != None): dom = BeautifulSoup(urlopen(url).read(), 'lxml') date = dom.select('#mensa_date > p')[0].contents[0] menuDefinition = dom.find(id=internalMensaId) menuDescription = menuDefinition.parent.find('dd') tables = menuDescription.select('table') legend = {} legend = buildLegend(legend, str(dom), regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)') if tables != None and len(tables) == 1: table = tables[0] rows = table.find_all('tr') for row in rows: menuNameElement = row.select('td[class="mensa_col_55"] > b') if menuNameElement != None and menuNameElement[0].contents != None: menuName = menuNameElement[0].contents[0] category = 'Gericht' # get notes notes = {} notesElement = row.select('td[class="mensa_col_55"] > span') if notesElement != None and len(notesElement) > 0 and notesElement[0].text != None: notes = [legend.get(n, n) for n in notesElement[0].text.split(' ') if n] # get prices prices = {} for td in row.select('td[class="mensa_col_15"]'): priceElement = td.find('b') groupElement = td.find('span') if priceElement != None and groupElement != None and groupElement.contents != None and len(groupElement.contents) > 0 and priceElement.contents != None and len(priceElement.contents) > 0: group = str(groupElement.contents[0]) price = str(priceElement.contents[0]) if group == 'Stud.:': prices['student'] = price elif group == 'Bed.:': prices['employee'] = price elif group == 'Gast:': prices['other'] = price canteen.addMeal(date, category, menuName, notes, prices) else: canteen.setDayClosed(date) # check for further pages nextPageLink = dom.find(id='next_day_link') if nextPageLink == None or today: url = None else: url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href'] return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() document = parse(urlopen(url).read(), 'lxml') for day_div in document.find_all('div', attrs={'data-day': True}): # parse date, warning: calculate year number needed date_test = day_regex.search(day_div['data-day']) if not date_test: print('Error: unable to parse date "{}"'.format( day_div['data-day'])) continue else: year = datetime.datetime.now().year if datetime.datetime.now().month > int(date_test.group('month')): year += 1 # date from next year date = '{}-{}-{}'.format(year, date_test.group('month'), date_test.group('day')) closed_candidate = day_div.find('div', 'holiday') is not None for meal_article in day_div.find_all('article', 'menu'): name = meal_article.find('div', 'title').text if not name: continue category = meal_article.find('div', 'icon')['title'] notes = [] prices = {} additives = meal_article.find('div', 'additnr') if additives: notes += [ additive.text for additive in additives.find_all('li') ] notes += [ v['title'] for v in meal_article.find_all('div', 'theicon') if v['title'] and v['title'] not in notes ] price_div = meal_article.find('div', 'price') if price_div: for k, v in price_map.items(): price = price_div['data-' + k] if price: prices[v] = price canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed()
def parse_url(url, today=False): content = urlopen(url).read() document = parse(content) legends = document.find_all('div', {'class': 'legende'}) if len(legends) > 0: extraLegend = {int(v[0]): v[1] for v in reversed(legend_regex.findall(legends[0].text))} else: extraLegend = {} canteen = LazyBuilder() for day_td in document.find_all('td', text=day_regex): date = day_regex.search(day_td.string).group('date') table = None for element in day_td.parents: if element.name == 'table': table = element break if not table: continue for tr in table.tbody.find_all('tr'): if 'geschlossen' in tr.text or 'Feiertage' in tr.text: match = day_range_regex.search(tr.text) if not match: canteen.setDayClosed(date) else: fromDate = datetime.datetime.strptime(match.group('from'), '%d.%m.%Y') toDate = datetime.datetime.strptime(match.group('to'), '%d.%m.%Y') while fromDate <= toDate: canteen.setDayClosed(fromDate.strftime('%Y-%m-%d')) fromDate += datetime.date.resolution continue if len(tr) != 3: continue # no meal strings = list(tr.contents[0].strings) name = strings[0] # prices: prices = strings[-1].split('|') print(prices) if '-' in map(lambda v: v.strip(), prices): prices = {} # notes: notes = [] for img in tr.contents[1].find_all('img'): notes.append(img['alt'].replace('Symbol', '').strip()) for extra in list(set(map(lambda v: int(v), extra_regex.findall(tr.text)))): if extra in extraLegend: notes.append(extraLegend[extra]) canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None) return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() document = parse(urlopen(url).read()) for day_div in document.find_all('div', 'day') + document.find_all('article', attrs={'data-day': True}): # parse date, warning: calculate year number needed date_test = day_regex.search(day_div['data-day']) if not date_test: print('Error: unable to parse date') continue else: year = datetime.datetime.now().year if datetime.datetime.now().month > int(date_test.group('month')): year += 1 # date from next year date = "{}-{}-{}".format(year, date_test.group('month'), date_test.group('day'), ) if 'nodata' in day_div.attrs.get('class', []) or 'GESCHLOSSEN' in day_div.text: canteen.setDayClosed(date) continue closed_candidate = False for meal_article in day_div.find_all('article', 'menu'): name = meal_article.find('div', 'title').text if not name: continue if 'geschlossen' in name: closed_candidate = True continue category = meal_article.find('div', 'desc').text notes = [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title']] if meal_article.find('div', 'additive'): notes += [v[0] for v in extra_regex.findall(meal_article.find('div', 'additive').text)] price_div = meal_article.find('div', 'price') if price_div is None: canteen.addMeal(date, category, name, notes) continue prices = {} for v, r in (('default', 'student'), ('bed', 'employee'), ('guest', 'other')): price = price_regex.search(price_div['data-' + v]) if price: prices[r] = price.group('price') elif v == 'default': prices = {} break canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() document = parse(urlopen(url).read(), 'lxml') for day_div in document.find_all('div', attrs={'data-day': True}): # parse date, warning: calculate year number needed date_test = day_regex.search(day_div['data-day']) if not date_test: print('Error: unable to parse date "{}"'.format(day_div['data-day'])) continue else: year = datetime.datetime.now().year if datetime.datetime.now().month > int(date_test.group('month')): year += 1 # date from next year date = '{}-{}-{}'.format(year, date_test.group('month'), date_test.group('day')) closed_candidate = day_div.find('div', 'holiday') is not None for meal_article in day_div.find_all('article', 'menu'): name = meal_article.find('div', 'title').text if not name: continue category = meal_article.find('div', 'icon')['title'] notes = [] prices = {} additives = meal_article.find('div', 'additnr') if additives: notes += [additive.text for additive in additives.find_all('li')] notes += [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title'] and v['title'] not in notes] price_div = meal_article.find('div', 'price') if price_div: for k, v in price_map.items(): price = price_div['data-' + k] if price: prices[v] = price canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed()
def parse_url(url, today=False): content = urlopen(url).read() document = parse(content, "lxml") canteen = LazyBuilder() table = document.find_all('table')[0] def debug_print(food_type, food_description, priceing): if (priceing is None): print(date + ': ' + food_type + ": " + food_description) else: print(date + ': ' + food_type + ": " + food_description + " : ", end='') for e in priceing: print(e, end=' ') print() def is_new_entry(tds): td = tds[0] return td.string is not None and date_regex.search( td.string) is not None def is_end_of_entry(tds): for td in tds: if (td.string is None or td.string.strip() != ''): return False return True def is_action_entry(td): return td.text == 'Aktion' def is_closed(tds): return is_new_entry(tds) and get_pricing(tds, 4, 7) is None def refactor_date(raw_date): now = datetime.datetime.now() day = date_regex.search(raw_date).group('day') month = date_regex.search(raw_date).group('month') year = now.year if month == '01' and now.month == 12: # if list depicts meals from this and the next year year += 1 elif month == '12' and now.month == 1: # if list depicts meals form this and the last year year -= 1 return day + '.' + month + '.' + str(year) def parse_foot_type(td): type = '' if td.string is None: img = td.find_all('img')[0] src = img.get('src') if ('msc' in src): type += 'Fish MSC ' elif ('vegan' in src): type += 'Vegan ' #Sometimes none categorized food is possible, therfore we need to cover this, #otherwhise openmensa.org will faile dueto an empty tag. elif (td.string.strip() == ''): type += 'Tipp ' else: if ('R' in td.string): type += 'Rind ' if ('S' in td.string): type += 'Schwein ' if ('G' in td.string): type += 'Geflügel ' if ('V' in td.string): type += 'Vegetarisch ' if ('F' in td.string): type += 'Fisch ' if ('L' in td.string): type += 'Lamm ' if ('W' in td.string): type += 'Wild ' tl = list(type)[:-1] return ''.join(tl) def get_refs(td): return td.find_all('sup') def get_foot_description(td): refl = get_refs(td) description = td.text for ref in refl: description = description.replace(' ' + ref.text, '', 1) if description[0] == ' ': description = description.replace(' ', '', 1) return description def get_notes(td): refl = get_refs(td) strl = [] for ref in refl: strl.extend(ref.string.split(',')) strl = list(set(strl)) return strl def build_notes_string(td): refs = get_notes(td) food_is = '' food_contains = '' for r in refs: # parse food is footnotes if r == '1': food_is += 'mit Farbstoffen, ' elif r == '4': food_is += 'geschwärzt, ' elif r == '7': food_is += 'mit Antioxidationsmittel, ' elif r == '8': food_is += 'mit Geschmacksverstärker, ' elif r == '9': food_is += 'geschwefelt, ' elif r == '10': food_is += 'geschwärzt, ' elif r == '11': food_is += 'gewachst, ' elif r == '12': food_is += 'mit Phosphat, ' elif r == '5': food_is += 'mit Süßungsmittel, ' # parse allergic footnotes elif r == 'a1': food_contains += 'Gluten, ' elif r == 'a2': food_contains += 'Krebstiere, ' elif r == 'a3': food_contains += 'Eier, ' elif r == 'a4': food_contains += 'Fisch, ' elif r == 'a5': food_contains += 'Erdnüsse, ' elif r == 'a6': food_contains += 'Soja, ' elif r == 'a7': food_contains += 'Milch/Laktose, ' elif r == 'a8': food_contains += 'Schalenfrüchte, ' elif r == 'a9': food_contains += 'Sellerie, ' elif r == 'a10': food_contains += 'Senf, ' elif r == 'a11': food_contains += 'Sesam, ' elif r == 'a12': food_contains += 'Schwefeldioxid/Sulfite, ' elif r == 'a13': food_contains += 'Lupinen, ' elif r == 'a14': food_contains += 'Weichtiere, ' else: food_contains += 'undefinierte Chemikalien:' + r + ', ' notes = '' if food_is != '': notes += 'Gericht ist ' + food_is if food_contains != '': if food_is == '': notes += 'Gericht enthält ' else: notes += 'und enthält ' notes += food_contains if notes != '': nl = list(notes) del nl[len(nl) - 1] nl[len(nl) - 1] = '.' notes = ''.join(nl) return notes def get_pricing(tds, f, t): priceing = [] #sometimes we dont don't get 7 elements, than this might be a special day if len(tds) < 7: return None for i in range(f, t): raw_price = tds[i].string.strip() if raw_price == '': return None else: priceing.append(price_regex.search(raw_price).group('val')) return priceing # state helper inside_valide_entry = False date = '' for tr in table.find_all('tr'): tds = tr.find_all('td') if (is_new_entry(tds)): try: raw_date = tds[0].string date = refactor_date(raw_date) if (is_closed(tds)): # sometismes a canteen might look closed but actually its spargeltage if "Spargeltage" in tds[3].text: canteen.addMeal(date, "Spargel", "Spargel Tag", ["Spargel und andere Gerichte."], None, None) else: canteen.setDayClosed(date) else: inside_valide_entry = True except Exception as e: traceback.print_exception(*sys.exc_info()) if (is_end_of_entry(tds)): inside_valide_entry = False elif inside_valide_entry: try: notes = [] if is_action_entry(tds[0]): food_type = parse_foot_type(tds[1]) food_description = get_foot_description(tds[2]) notes_string = build_notes_string(tds[2]) if (notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 3, 6) canteen.addMeal(date, 'Aktion: ' + food_type, food_description, notes, prices, roles if prices else None) else: food_type = parse_foot_type(tds[2]) food_description = get_foot_description(tds[3]) notes_string = build_notes_string(tds[3]) if (notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 4, 7) canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None) except Exception as e: traceback.print_exception(*sys.exc_info()) return canteen.toXMLFeed()
def parse_url(url, today=False): content = urlopen(url).read() document = parse(content, "lxml") canteen = LazyBuilder() table = document.find_all('table')[0] def debug_print(food_type, food_description, priceing): if(priceing is None): print(date+': '+food_type+": "+food_description) else: print(date+': '+food_type+": "+food_description+" : ", end='') for e in priceing: print(e, end=' ') print() def is_new_entry(tds): td = tds[0] return td.string is not None and date_regex.search(td.string) is not None def is_end_of_entry(tds): for td in tds: if(td.string is None or td.string.strip() != ''): return False return True def is_action_entry(td): return td.text == 'Aktion' def is_closed(tds): return is_new_entry(tds) and get_pricing(tds, 4, 7) is None def refactor_date(raw_date): now = datetime.datetime.now() day = date_regex.search(raw_date).group('day') month = date_regex.search(raw_date).group('month') year = now.year if month == '01' and now.month == 12: # if list depicts meals from this and the next year year+=1 elif month == '12' and now.month == 1: # if list depicts meals form this and the last year year-=1 return day+'.'+month+'.'+str(year) def parse_foot_type(td): type = '' if td.string is None: if len(td.find_all('img')) == 0: return None else: img = td.find_all('img')[0] src = img.get('src') if('msc' in src): type += 'Fish MSC ' elif('vegan' in src): type += 'Vegan ' #Sometimes none categorized food is possible, therfore we need to cover this, #otherwhise openmensa.org will faile dueto an empty tag. elif(td.string.strip() == ''): type += 'Tipp ' else: if('R' in td.string): type += 'Rind ' if('S' in td.string): type += 'Schwein ' if('G' in td.string): type += 'Geflügel ' if('V' in td.string): type += 'Vegetarisch ' if('F' in td.string): type += 'Fisch ' if('L' in td.string): type += 'Lamm ' if('W' in td.string): type += 'Wild ' tl = list(type)[:-1] return ''.join(tl) def get_refs(td): return td.find_all('sup') def get_foot_description(td): refl = get_refs(td) description = td.text for ref in refl: description = description.replace(' '+ref.text, '', 1) if description[0] == ' ': description = description.replace(' ', '', 1) return description def get_notes(td): refl = get_refs(td) strl = [] for ref in refl: strl.extend(ref.string.split(',')) strl = list(set(strl)) return strl def build_notes_string(td): refs = get_notes(td) food_is = '' food_contains = '' for r in refs: # parse food is footnotes if r == '1': food_is += 'mit Farbstoffen, ' elif r == '4': food_is += 'geschwärzt, ' elif r == '7': food_is += 'mit Antioxidationsmittel, ' elif r == '8': food_is += 'mit Geschmacksverstärker, ' elif r == '9': food_is += 'geschwefelt, ' elif r == '10': food_is += 'geschwärzt, ' elif r == '11': food_is += 'gewachst, ' elif r == '12': food_is += 'mit Phosphat, ' elif r == '5': food_is += 'mit Süßungsmittel, ' # parse allergic footnotes elif r == 'a1': food_contains += 'Gluten, ' elif r == 'a2': food_contains += 'Krebstiere, ' elif r == 'a3': food_contains += 'Eier, ' elif r == 'a4': food_contains += 'Fisch, ' elif r == 'a5': food_contains += 'Erdnüsse, ' elif r == 'a6': food_contains += 'Soja, ' elif r == 'a7': food_contains += 'Milch/Laktose, ' elif r == 'a8': food_contains += 'Schalenfrüchte, ' elif r == 'a9': food_contains += 'Sellerie, ' elif r == 'a10': food_contains += 'Senf, ' elif r == 'a11': food_contains += 'Sesam, ' elif r == 'a12': food_contains += 'Schwefeldioxid/Sulfite, ' elif r == 'a13': food_contains += 'Lupinen, ' elif r == 'a14': food_contains += 'Weichtiere, ' else: food_contains += 'undefinierte Chemikalien:'+r+', ' notes = '' if food_is != '': notes += 'Gericht ist ' + food_is if food_contains != '': if food_is == '': notes += 'Gericht enthält ' else: notes += 'und enthält ' notes += food_contains if notes != '': nl = list(notes) del nl[len(nl)-1] nl[len(nl)-1] = '.' notes = ''.join(nl) return notes def get_pricing(tds, f, t): priceing = [] #sometimes we dont don't get 7 elements, than this might be a special day if len(tds) < 7: return None for i in range(f, t): raw_price = tds[i].string.strip() if raw_price == '': return None else: priceing.append(price_regex.search(raw_price).group('val')) return priceing # state helper inside_valide_entry = False date = '' for tr in table.find_all('tr'): tds = tr.find_all('td') if(is_new_entry(tds)): try: raw_date = tds[0].string date = refactor_date(raw_date) if(is_closed(tds)): # sometismes a canteen might look closed but actually its spargeltage if "Spargeltage" in tds[3].text: canteen.addMeal(date, "Spargel", "Spargel Tag", ["Spargel und andere Gerichte."], None, None) else: canteen.setDayClosed(date) else: inside_valide_entry = True except Exception as e: traceback.print_exception(*sys.exc_info()) if(is_end_of_entry(tds)): inside_valide_entry = False elif inside_valide_entry: try: notes = [] if is_action_entry(tds[0]): food_type = parse_foot_type(tds[1]) food_description = get_foot_description(tds[2]) notes_string = build_notes_string(tds[2]) if(notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 3, 6) canteen.addMeal(date, 'Aktion: '+food_type, food_description, notes, prices, roles if prices else None) else: food_type = parse_foot_type(tds[2]) food_description = get_foot_description(tds[3]) notes_string = build_notes_string(tds[3]) if(notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 4, 7) if food_type is not None: canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None) except Exception as e: traceback.print_exception(*sys.exc_info()) return canteen.toXMLFeed()
def parse_url(url, today=False): today = datetime.date.today() if today.weekday() == 6: # Sunday today += datetime.timedelta(days=1) # Tomorrow if "%s" in url: url = url % today.strftime('%Y_%m_%d') try: content = requests.get(url).text except requests.exceptions.ConnectionError as e: logging.warning(str(e)) content = requests.get(url, verify=False).text document = BeautifulSoup(content, "html.parser") canteen = LazyBuilder() # Prices for employees and guests try: p = price_employee_regex.search(document.find("main").text).groupdict() employee = float(p["employee"].split(",")[0]) + \ float(p["employee"].split(",")[1]) / 100 p = price_guest_regex.search(document.find("main").text).groupdict() guest = float(p["employee"].split(",")[0]) + \ float(p["employee"].split(",")[1]) / 100 except (AttributeError, TypeError, KeyError, ValueError): employee_multiplier = 1.25 guest_multiplier = 1.60 employee = None guest = None # Date p = datespan_regex.search(document.find( "div", {"class": "maincontent"}).find("h2").text).groupdict() if len(p["from"].split(".")[2]) == 0: p["from"] += p["to"].split(".")[2] fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y") maincontent = document.find("div", {"class": "maincontent"}) table = maincontent.find("table") if not table: if maincontent: # Die Speisenausgabe DHBW Eppelheim ist vom dd.mm.yyyy – dd.mm.yyyy # geschlossen p = datespan_regex.search(maincontent.text) if p: fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y") todate = datetime.datetime.strptime(p["to"], "%d.%m.%Y") while fromdate <= todate: canteen.setDayClosed(fromdate.strftime('%d.%m.%Y')) fromdate += datetime.timedelta(1) return canteen.toXMLFeed() trs = table.find_all("tr") date = None for tr in trs: tds = tr.find_all("td") if len(tds) == 4: td0, td1, td2, td3 = tds day = td0.text.strip() date = fromdate + datetime.timedelta(days=daysGerman.index(day)) date = date.strftime('%d.%m.%Y') else: td0 = None td1, td2, td3 = tds notes = [] if "feiertag" in td1.text.lower() or "geschlossen" in td1.text.lower(): canteen.setDayClosed(date) continue categoryName = td1.text.strip()[:-1] mealName = td2.text.strip() if not categoryName or not mealName: continue prices = [] try: price = float(euro_regex.search( td3.text).group(1).replace(",", ".")) prices.append(price) if employee is not None: prices.append(employee) else: prices.append(price * employee_multiplier) if guest is not None: prices.append(guest) else: prices.append(price * guest_multiplier) except (AttributeError, TypeError, KeyError, ValueError): notes.append(td3.text.strip()) notes = [x for x in notes if x] canteen.addMeal(date, categoryName, mealName, notes if notes else None, prices if prices else None, roles if prices else None) return canteen.toXMLFeed()
def parse_url(url, today=False): today = datetime.date.today() if today.weekday() == 6: # Sunday today += datetime.timedelta(days=1) # Tomorrow url = url % today.strftime('%Y_%m_%d') if not url.startswith("http://") and not url.startswith("https://"): raise RuntimeError("url is not an allowed URL: '%s'" % url) try: content = requests.get(url).text except requests.exceptions.ConnectionError as e: logging.warning(e) content = requests.get(url, verify=False).text document = BeautifulSoup(content, "html.parser") canteen = LazyBuilder() # Prices for employees and guests try: p = price_regex.search(document.find( "p", {"id": "message"}).text).groupdict() employee_multiplier = 1.0 + int(p["employee"]) / 100.0 guest_multiplier = 1.0 + int(p["guest"]) / 100.0 except (AttributeError, TypeError, KeyError, ValueError): employee_multiplier = 1.25 guest_multiplier = 1.60 trs = document.find("table", {"id": "previewTable"}).find_all("tr") canteenCategories = [] firstTr = True previous = None # previous tr row for tr in trs: closed = False mealsFound = False if firstTr: # First table row contains the names of the different categories firstTr = False for th in tr.find_all("th")[1:]: canteenCategories.append(th.text.strip()) elif previous is None: # Normal table row containing meal information previous = tr else: # Price table row date = day_regex.search(previous.find("td", {"class": "first"})[ "data-date"]).group('date') if "geschlossen" == previous.find_all("td")[1].text.strip(): closed = date cat = 0 for td0, td1 in zip(previous.find_all("td")[ 1:], tr.find_all("td")): if "heute kein Angebot" in td0.text or "geschlossen" in td0.text: cat += 1 continue notes = [] # Category if td0.find("h2"): categoryName = canteenCategories[cat] + " " + \ correctCapitalization(td0.find("h2").text.strip()) else: categoryName = canteenCategories[cat] if "Kubusangebote am Themenpark" in td0.text: canteen.addMeal(date, categoryName, "Kubusangebote am Themenpark", []) cat += 1 continue # Name if td0.find("p"): name = removeextras_regex.sub("", td0.find("p").text) else: name = categoryName # No name available, let's just use the category name # Prices prices = [] spans = td1.find_all("span", {"class": "label"}) if spans: try: price = float(euro_regex.search( spans[0].text).group(1).replace(",", ".")) except (AttributeError, TypeError, KeyError, ValueError): notes.append(spans[0].text.strip() + " Preis") if len(spans) == 2: notes.append(spans[1].text.strip() + " Preis") prices = (price, price * employee_multiplier, price * guest_multiplier) # Notes: vegan, vegetarisch, ... notes += [icon["title"] for icon in td1.find_all("span", {"class": "icon"})] canteen.addMeal(date, categoryName, name, notes, prices, roles if prices else None) mealsFound = True cat += 1 previous = None if not mealsFound and closed: canteen.setDayClosed(closed) return canteen.toXMLFeed()
def parsePlan(url, internalMensaId, today): canteen = LazyBuilder() end = False while (url != None): dom = BeautifulSoup(urlopen(url).read(), 'lxml') date = dom.select('#mensa_date > p')[0].contents[0] menuDefinition = dom.find(id=internalMensaId) menuDescription = menuDefinition.parent.find('dd') tables = menuDescription.select('table') legend = {} legend = buildLegend( legend, str(dom), regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)' ) if tables != None and len(tables) == 1: table = tables[0] rows = table.find_all('tr') for row in rows: menuNameElement = row.select('td[class="mensa_col_55"] > b') if menuNameElement != None and menuNameElement[ 0].contents != None: menuName = menuNameElement[0].contents[0] category = 'Gericht' # get notes notes = {} notesElement = row.select( 'td[class="mensa_col_55"] > span') if notesElement != None and len( notesElement) > 0 and notesElement[0].text != None: notes = [ legend.get(n, n) for n in notesElement[0].text.split(' ') if n ] # get prices prices = {} for td in row.select('td[class="mensa_col_15"]'): priceElement = td.find('b') groupElement = td.find('span') if priceElement != None and groupElement != None and groupElement.contents != None and len( groupElement.contents ) > 0 and priceElement.contents != None and len( priceElement.contents) > 0: group = str(groupElement.contents[0]) price = str(priceElement.contents[0]) if group == 'Stud.:': prices['student'] = price elif group == 'Bed.:': prices['employee'] = price elif group == 'Gast:': prices['other'] = price canteen.addMeal(date, category, menuName, notes, prices) else: canteen.setDayClosed(date) # check for further pages nextPageLink = dom.find(id='next_day_link') if nextPageLink == None or today: url = None else: url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href'] return canteen.toXMLFeed()