def feed_today(self, name): if name in self.canteens: today = nowBerlin().date() lazyBuilder = StyledLazyBuilder() mensaId = self.canteens[name]["id"] _parseMealsUrl(lazyBuilder, mensaId, today) return lazyBuilder.toXMLFeed() return 'Wrong mensa name'
def feed_all(self, name): startTime = time.time() if name in self.canteens: mensaId = self.canteens[name]["id"] lazyBuilder = StyledLazyBuilder() date = nowBerlin() # Get today ret = _parseMealsUrl(lazyBuilder, mensaId, date.date()) n = 1 if ret: date += datetime.timedelta(days=1) # Get this week threads = [] while date.weekday() < 5: t = Thread(target=_parseMealsUrl, args=(lazyBuilder, mensaId, date.date())) t.start() threads.append(t) date += datetime.timedelta(days=1) n += 1 # Skip over weekend date += datetime.timedelta(days=7 - date.weekday()) # Get next week while date.weekday() < 5 and n < 5: t = Thread(target=_parseMealsUrl, args=(lazyBuilder, mensaId, date.date())) t.start() threads.append(t) date += datetime.timedelta(days=1) n += 1 for t in threads: t.join() endTime = time.time() logging.debug( f"feed_all({name}) took {endTime - startTime:.2f} seconds") return lazyBuilder.toXMLFeed() return 'Wrong mensa name'
def parseHorizontalDates(document, lazyBuilder, legend): lazyBuilder.setLegendData( legend) # Useless, because the legends are usually incomplete tables = document.select('table.std thead') if not tables: logging.warning("No tables found") return for thead in tables: dates = [] now = nowBerlin() dateTexts = [td.text.strip() for td in thead.tr.select('td')] for s in dateTexts: m = datePattern.search(s) if not m: continue date = m[0] spt = date.split('.') if spt[-1] == '': if int(spt[-2]) < now.month: date += str(now.year + 1) else: date += str(now.year) dates.append(date) firstRow = True for tr in thead.children: if not isinstance(tr, bs4.element.Tag): continue if firstRow: # First row are the dates firstRow = False continue category = tr.td.text.strip() dateIndex = 0 for td in tr.select('td.zelle_inhalt'): date = dates[dateIndex] dateIndex += 1 notes = [] if not td.a: continue if "gruen" in td.a["class"]: notes.append("fleischlos") additives = [ x.attrs["alt"].strip() for x in td.select('a')[0].select('.additive img[alt]') if x.attrs["alt"].strip() ] for span in td.select('div[style*="font-size:10px"] span'): additive = span.text.strip() if additive not in additives: additives.append(additive) span.clear() notes += [ legend[additive] if additive in legend else additive for additive in additives ] mealName = " ".join( x.strip(" ,").strip() for x in td.select('a')[0].strings) price = 0 for m in pricePattern.findall(mealName): price += float(m[0].replace(',', '.')) prices = [] roles = [] if price > 0: prices.append(price) roles.append('student') if not mealName: continue for j, productName in enumerate( textwrap.wrap(mealName, width=250)): lazyBuilder.addMeal(date, category, productName, notes if j == 0 else None, prices if j == 0 else None, roles if j == 0 else None)
def feed(self, refName): if refName not in self.canteens: return f"Unkown canteen '{refName}'" path = self.canteens[refName]["source"] domain = self.canteens[refName]["domain"] pasto = self.canteens[refName].get("pasto", None) today = nowBerlin() if "{timestamp}" in path: if today.weekday() == 6: ts = today + datetime.timedelta(days=1) else: ts = today path = path.format(timestamp=int(ts.timestamp())) if "change_language" in self.canteens[refName]: lang = self.canteens[refName]["change_language"] html = requests.get(f"https://{domain}/change_language/{lang}", headers={ "Referer": f"https://{domain}{path}" }).text else: html = requests.get(f"https://{domain}{path}").text lazyBuilder = StyledLazyBuilder() document = BeautifulSoup(html, "html.parser") # Log name logging.debug(f"\tReference: {refName}") for selected in document.select( '#selector_bar_container select option[selected]'): if selected.text: logging.debug(f"\tSelected: {selected.text}") else: logging.debug(f"\tSelected: {selected}") # Dates dates = [] for day in document.select(".days_container .day"): try: i = int(day.text) except ValueError: continue date = today.replace(day=i) if date.day > today.day: try: date = date.replace(month=date.month - 1 if date.month > 1 else 12) except ValueError: date = date.replace( month=date.month if date.month > 1 else 12) if dates and date < dates[-1]: date = date.replace(month=date.month + 1 if date.month < 12 else 1) dates.append(date) # Meals settimana = document.find("div", {"id": "settimana"}) if settimana: for table in settimana.select("table.tabella_menu_settimanale"): if table.find("h5"): heading = table.find("h5").text.strip().lower() if heading: if pasto and heading != pasto.lower(): logging.debug( f"\tSkipping pasto: {heading} (!= {pasto.lower()})" ) continue else: logging.debug(f"\tUsing pasto: {heading}") for tr in table.select("tr"): category = tr.find("th").text.strip() for td in tr.select("td"): day_index = int(td.attrs["data-giorno"]) - 1 for p in td.select("p.piatto_inline"): name = p.text.replace(" *", "").replace( "* ", "").replace("*", "").strip() for mealText in textwrap.wrap(name, width=250): lazyBuilder.addMeal(dates[day_index].date(), category, mealText) return lazyBuilder.toXMLFeed()
def _parseMealsUrl(lazyBuilder, mensaId, day=None): if day is None: day = nowBerlin().date() date = day.strftime("%Y-%m-%d") content = _getMealsURL(mealsUrl.format(date=date)) document = BeautifulSoup(content, "html.parser") mensaDivs = document.find_all("div", class_="tx-epwerkmenu-menu-location-wrapper") mensaDivs = [ mensaDiv for mensaDiv in mensaDivs if mensaDiv.attrs["data-location"] == str(mensaId) ] if len(mensaDivs) != 1: # Check if mensa is in drowndown selector checkbox = document.find(id=f"building-id-{mensaId}") if checkbox: logging.debug(f"No meals found [id='{mensaId}']") with lazyBuilderLock: lazyBuilder.setDayClosed(date) else: logging.error(f"Mensa not found [id='{mensaId}']") return False mensaDiv = mensaDivs.pop() menuTiles = mensaDiv.find_all("div", class_="menue-tile") foundAny = False for menuTile in menuTiles: category = string.capwords(menuTile.attrs["data-category"]) mealName = menuTile.find( class_="tx-epwerkmenu-menu-meal-title").text.strip() desc = menuTile.find(class_="tx-epwerkmenu-menu-meal-description") if desc and desc.text.strip(): mealName = f"{mealName} {desc.text.strip()}" additives = menuTile.find(class_="tx-epwerkmenu-menu-meal-additives") for sup in additives.find_all('sup'): sup.extract() notes = [ note.strip() for note in additives.text.split("\n") if note.strip() ] pricesDiv = menuTile.find( class_="tx-epwerkmenu-menu-meal-prices-values") roles = [] prices = [] for j, price in enumerate(pricesDiv.text.split('/')): price = price.strip().replace(',', '.') try: price = float(price) prices.append(price) roles.append(rolesOrder[j]) except ValueError: pass with lazyBuilderLock: for j, mealText in enumerate(textwrap.wrap(mealName, width=250)): lazyBuilder.addMeal(date, category, mealName, notes if j == 0 else None, prices if j == 0 else None, roles if j == 0 else None) foundAny = True if foundAny: return True with lazyBuilderLock: lazyBuilder.setDayClosed(date) return False
def getMenu(mensaId): """ Create openmensa feed from mensen.at website """ lazyBuilder = StyledLazyBuilder() today = nowBerlin().date() year = today.year month = today.month r = askMensenAt(mensaId=mensaId) if r.status_code != 200: status = 'Could not open mensen.at' if 'status' in r.headers: status = f"{status}: {r.headers['status']}" logging.error(status) from pprint import pprint pprint(r.headers) return status document = BeautifulSoup(r.text, "html.parser") def extractLine(line, data): def price(m): data['price'] = m[1].replace(',', '.') return "" def addi(m): data['additives'] += [ x.strip() for x in m.group(0)[1:-1].split(',') if x.strip() ] return "" line = re.sub("€\s+(\d+[,\.]\d+)", price, line) line = re.sub("\(([A-Z],?\s*)+\)", addi, line) line = re.sub("\s+", " ", line).strip().replace(' ,', ',') data['text'].append(line) dates = {} for navItem in document.select('.weekdays .nav-item[data-index]'): index = int(navItem.attrs['data-index']) date = navItem.find('span', class_="date").text.split('.') dates[index] = datetime.date( year + 1 if int(date[1]) < month else year, int(date[1]), int(date[0])) mealDict = {} for menuItem in document.select(".menu-item[class*='menu-item-']"): index = int([ className.split('menu-item-')[1] for className in menuItem.attrs['class'] if 'menu-item-' in className ][0]) category = menuItem.h2.text if index not in mealDict: mealDict[index] = {} lines = [] imgs = [] for p in menuItem.find_all('p'): lines.append(p.text) imageList = [] imgs.append(imageList) for img in p.find_all('img'): if 'alt' in img.attrs: imageList.append(img.attrs['alt']) else: foundSrc = False for src, alt in imageLegend.items(): if src in img.attrs['src']: imageList.append(alt) foundSrc = True break if not foundSrc: logging.warning("Unkown image found: %r" % (img, )) lines = [p.text.strip() for p in menuItem.find_all('p')] lines.append('#end') imgs.append([]) data = {'additives': [], 'text': []} for i, line in enumerate(lines): data['additives'] += imgs[i] addMeal = False if line == '#end': addMeal = True elif all(c == '*' for c in line): # ********* addMeal = True else: extractLine(line, data) if 'price' in data: addMeal = True if line.startswith('*') and addMeal: addMeal = False if addMeal and data['text']: data['additives'] = [ legend[add] if add in legend else add for add in data['additives'] if add ] notes = list( dict.fromkeys([note[0:249] for note in data['additives']])) for j, productName in enumerate( textwrap.wrap(" ".join(data['text']).strip(), width=250)): if category not in mealDict[index]: mealDict[index][category] = [] if productName not in mealDict[index][category]: mealDict[index][category].append(productName) lazyBuilder.addMeal( dates[index], category, productName, notes if j == 0 else None, (data['price'], ) if 'price' in data and j == 0 else None, roles if 'price' in data and j == 0 else None) data = {'additives': [], 'text': []} return lazyBuilder.toXMLFeed()
def getMenu(restaurantId, datetimeDay=None, serviceIds=None, alternativeId=None, alternativeServiceIds=None): """ Create openmensa feed from restopolis website """ startTime = time.time() lazyBuilder = StyledLazyBuilder() comments = [] if not datetimeDay: datetimeDay = nowBerlin().date() if isinstance(serviceIds, str) or not isinstance(serviceIds, Iterable): serviceIds = [ (serviceIds, ""), ] for i, service in enumerate(serviceIds): if isinstance(service, str) or isinstance(service, int): serviceIds[i] = (service, "") mealCounter = 0 dayCounter = set() weekdayCounter = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0} repeat = len(serviceIds) == 1 repeatCounter = 0 mealCounterLast = mealCounter for service in serviceIds: serviceSuffix = f"({service[1]})" if service[1] and len( serviceIds) > 1 else "" r = askRestopolis(restaurant=restaurantId, service=service[0], date=datetimeDay) if r.status_code != 200: status = f'Could not open restopolis Error{r.status_code}' if 'status' in r.headers: status = f"{status}: {r.headers['status']}" logging.error(status) from pprint import pprint pprint(r.headers) return status, 0, 0, weekdayCounter if '<' not in r.text: comments.append( f"Restaurant [id={restaurantId}, service={service}]: No HTML in response body: `{r.text}`" ) break document = BeautifulSoup(r.text, "html.parser") # Extract available dates from date selector dateSelector = document.find("div", {"class": "date-selector-desktop"}) if dateSelector: dateButtons = dateSelector.find_all("button", {"class": "day"}) dates = [] for button in dateButtons: dates.append( datetime.datetime.strptime(button.attrs["data-full-date"], '%d.%m.%Y').date()) else: dateSelector = document.find( "div", {"class": "date-selector-mobile-indicator"}) if dateSelector: dateButtons = dateSelector.find_all( "div", {"class": "date-selector-mobile-day-bullet"}) dates = [] for button in dateButtons: dates.append( datetime.datetime.strptime( button.attrs["data-day-text"], '%d.%m.%Y').date()) else: dateSelector = document.find("div", {"id": "date-selector"}) if dateSelector: dateButtons = dateSelector.find_all("a", {"class": "day"}) dates = [] for button in dateButtons: dates.append( datetime.datetime.strptime( button.attrs["data-date"], '%d.%m.%Y').date()) elif not dateSelector: logging.warning(f"No div.date-selector-desktop found") comments.append( f"Restaurant [id={restaurantId}, service={service}] not found" ) break # Extract menu for each date for i, oneDayDiv in enumerate(document.select(".daily-menu>div")): dayCounter.add(dates[i]) date = dates[i] weekDay = date.weekday() courseName = "" categoryNotes = [] notes = [] productSection = "" productName = "" productAllergens = [] productDescription = "" isClosed = False oneDayDiv.append( document.new_tag("div", attrs={"class": "fake-last"})) children = list(oneDayDiv.children) for div in children: if not isinstance(div, bs4.element.Tag): # Skip text node children continue if not isClosed and courseName and productName and "class" in div.attrs and ( "fake-last" in div.attrs["class"] or "product-name" in div.attrs["class"] or "course-name" in div.attrs["class"] or "product-section" in div.attrs["class"]): # Add meal mealCounter += 1 weekdayCounter[weekDay] += 1 category = courseName if productSection: category += " " + productSection if serviceSuffix: category += " " + serviceSuffix if productDescription: notes += textwrap.wrap(productDescription, width=250) if productAllergens: notes += productAllergens if categoryNotes: notes += categoryNotes lazyBuilder.addMeal(date, category, productName[0:249], [note[0:249] for note in notes]) productName = "" productAllergens = [] productDescription = "" notes = [] # walk through all div and collect info if "class" in div.attrs: if "fake-last" in div.attrs["class"]: pass elif "no-products" in div.attrs["class"] or div.find( ".formulaeContainer.no-products"): # Closed (No meals) lazyBuilder.setDayClosed(date) isClosed = True elif "fermé" in div.text.lower() or "fermé" in str( div.attrs).lower(): # Closed (explicit) lazyBuilder.setDayClosed(date) isClosed = True elif "course-name" in div.attrs["class"]: courseName = div.text.strip() productSection = "" elif "product-section" in div.attrs["class"]: productSection = div.text.strip() elif "product-allergens" in div.attrs["class"]: productAllergensGen = (a.strip() for a in div.text.split(",") if a.strip()) productAllergens += [ re.sub("\d+", lambda m: allergen(m[0]), a) for a in productAllergensGen ] elif "product-description" in div.attrs["class"]: productDescription = div.text.strip() elif "product-name" in div.attrs["class"]: productName = div.text.strip() productName = productName.replace("''", '"') productName = productName.replace("1/2 ", '½ ') elif "product-flag" in div.attrs["class"]: unknownImg = True for img in imgs: if div.attrs["src"].endswith(img): notes.append(imgs[img]) unknownImg = False if unknownImg: logging.warning( f"Unkown img {div.attrs['src']} [restaurant={restaurantId}]" ) comments.append( f"Unkown img {div.attrs['src']} [restaurant={restaurantId}]" ) elif "wrapper-theme-day" in div.attrs["class"]: logging.info( f"Theme day: {div.text.strip()} [restaurant={restaurantId}]" ) comments.append( f"Theme day: {div.text.strip()} [restaurant={restaurantId}]" ) elif "wrapper-category" in div.attrs["class"]: for categoryButton in div.find_all('button'): if "showConstantProducts" not in categoryButton.attrs[ 'class'] and "showFormulae" not in categoryButton.attrs[ 'class']: logging.info( f"Unknown category button: {categoryButton.attrs['class']}: {categoryButton.text.strip()}" ) comments.append( f"Unknown category button: {categoryButton.attrs['class']}: {categoryButton.text.strip()}" ) elif "cb" in div.attrs["class"]: pass elif "formulaeContainer" in div.attrs[ "class"] or "constantProductContainer" in div.attrs[ "class"]: # Append content of category container last = children.pop() children.extend(div.children) children.append(last) if "constantProductContainer" in div.attrs["class"]: categoryNotes = ["produit constant"] else: categoryNotes = [] elif "action-buttons" in div.attrs["class"]: pass else: logging.debug(div) raise RuntimeWarning( f"unknown tag <{div.name}> with class {div.attrs['class']}: oneDayDiv->else [restaurant={restaurantId}]" ) elif div.name == 'ul': mealCounter += 1 weekdayCounter[weekDay] += 1 for li in div.select('li'): if not li.text or not li.text.strip(): continue # Add meal category = courseName if productSection: category += " " + productSection lazyBuilder.addMeal(date, category, li.text.strip()[0:249]) productName = "" productAllergens = [] productDescription = "" else: logging.debug(div) raise RuntimeWarning( f"unknown tag <{div.name}>: oneDayDiv->else") if hasattr(r, 'duration' ) and r.duration < 2000 and time.time() - startTime < 7000: if repeat and repeatCounter < 3 and ( mealCounter > 0 and mealCounter > mealCounterLast or nowBerlin().weekday() in (5, 6)): repeatCounter += 1 mealCounterLast = mealCounter serviceIds.append(service) datetimeDay += datetime.timedelta(days=7) if mealCounter == 0 and alternativeId: logging.debug("No meals -> trying alternativeId") return getMenu(alternativeId, datetimeDay=datetimeDay, serviceIds=alternativeServiceIds, alternativeId=None, alternativeServiceIds=None) xml = lazyBuilder.toXMLFeed() for commentStr in comments: xml += f"\n<!-- {commentStr.replace('--', '- -')} -->\n" print(f" {len(dayCounter):3d} 📅 {mealCounter:4d} 🍽️ ", end="") return xml, len(dayCounter), mealCounter, weekdayCounter