def parse_url(url, today=False, canteentype="Mittagsmensa", this_week="", next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[: url.find("essen/") + 6] + "wissenswertes/lebensmittelkennzeichnung"
    legend_doc = parse(urlopen(legend_url)).find(id="artikel")
    allergene = buildLegend(
        text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)"
    )
    allergene["EI"] = "Ei"
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)"
    )
    for tr in legend_doc.find_all("tr"):
        tds = tr.find_all("td")
        if len(tds) != 2:
            continue
        title = tds[0].find("strong")
        if title is None:
            continue
        else:
            title = title.text
        text = tds[1].text.replace("enthält", "").strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + "-kommende-woche", canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    print(canteen.toXMLFeed())
    return canteen.toXMLFeed()
Exemple #2
0
    def metadata(self, request):
        meta = LazyBuilder(version=self.parser.version)

        meta.feeds.append(Feed(
            name='today',
            hour='8-14',
            url='/'.join([request.host, self.parser.name, self.name, 'today.xml']),
            priority=0,
            source=None,
            dayOfMonth='*',
            dayOfWeek='*',
            minute='0',
            retry=None
        ))

        meta.feeds.append(Feed(
            name='full',
            hour='8',
            url='/'.join([request.host, self.parser.name, self.name, 'full.xml']),
            priority=0,
            source=None,
            dayOfMonth='*',
            dayOfWeek='*',
            minute='0',
            retry=None
        ))

        return meta.toXMLFeed()
Exemple #3
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url + '.html', canteen)
    if not today:
        parse_week(url + '-w1.html', canteen)
        parse_week(url + '-w2.html', canteen)
    return canteen.toXMLFeed()
Exemple #4
0
    def metadata(self, request):
        meta = LazyBuilder(version=self.parser.version)

        meta.feeds.append(Feed(
            name='today',
            hour='8-14',
            url='/'.join([request.host, self.parser.name, self.name, 'today.xml']),
            priority=0,
            source=None,
            dayOfMonth='*',
            dayOfWeek='*',
            minute='0',
            retry=None
        ))

        meta.feeds.append(Feed(
            name='full',
            hour='8',
            url='/'.join([request.host, self.parser.name, self.name, 'full.xml']),
            priority=0,
            source=None,
            dayOfMonth='*',
            dayOfWeek='*',
            minute='0',
            retry=None
        ))

        return meta.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()

    content = urlopen(url).read()
    document = parse(content, 'lxml')

    available_weeks = parse_available_weeks(document)

    # for the case that the start date is not auto set by the page e.g. on weekends
    noskip = find_start_date(document) is None

    employees_fee, guests_fee = parse_fees(document)
    groups = parse_ingredients(document)

    for idx, week in enumerate(available_weeks):
        if idx > 0 or noskip:
            content = urlopen("{}?selWeek={}".format(url, week)).read()
            document = parse(content, 'lxml')

        parse_meals_for_canteen(document, canteen, employees_fee, guests_fee,
                                groups, today)
        if today:
            break

    return canteen.toXMLFeed()
Exemple #6
0
    def feed_all(self, name):
        canteen = LazyBuilder()

        date = self.__now()

        # Get this week
        lastWeekday = -1
        while self.handler(canteen, self.xml2locId[name], date.date()):
            date += datetime.timedelta(days=1)
            if lastWeekday > date.weekday():
                break
            lastWeekday = date.weekday()

        # Skip over weekend
        if date.weekday() > 4:
            date += datetime.timedelta(days=7-date.weekday())

            # Get next week
            lastWeekday = -1
            while self.handler(canteen, self.xml2locId[name], date.date()):
                date += datetime.timedelta(days=1)
                if lastWeekday > date.weekday():
                    break
                lastWeekday = date.weekday()

        return canteen.toXMLFeed()
Exemple #7
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url + '.html?view=list', canteen)
    if not today:
        parse_week(url + '-w1.html?view=list', canteen)
        parse_week(url + '-w2.html?view=list', canteen)
    return canteen.toXMLFeed()
Exemple #8
0
def parse_url(url, mensa, *weeks, today):
    canteen = LazyBuilder()
    for week in weeks:
        parse_week(url + week, canteen, mensa)
        if today:
            break
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'}
    document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read())
    for td in document.find_all('td', 'beschreibung'):
        legend[td.previous_sibling.previous_sibling.text] = td.text
    document = parse(urlopen(base + '/unsere-preise/').read())
    prices = {}
    for tr in document.find('table', 'essenspreise').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get('class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')
    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read())
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.date.resolution
                continue
            else:
                raise e
        else:
            errorCount = 0
        for tr in document.find('table', 'zusatzstoffe').find_all('tr'):
            identifier = tr.find_all('td')[0].text \
                           .replace('(', '').replace(')', '')
            legend[identifier] = tr.find_all('td')[1].text.strip()
        canteen.setLegendData(legend)
        mensa_data = document.find('table', 'menu')
        category = None
        for menu_tr in mensa_data.find_all('tr'):
            if menu_tr.find('td', 'headline'):
                continue
            if menu_tr.find('td', 'gericht').text:
                category = menu_tr.find('td', 'gericht').text
            data = menu_tr.find('td', 'beschreibung')
            name = data.find('span').text.strip()
            notes = [span['title'] for span in data.find_all('span', title=True)]
            canteen.addMeal(
                date, category, name, notes,
                prices.get(category.replace('Aktionsessen', 'Bio-/Aktionsgericht'), {})
            )
        date += datetime.date.resolution
        if today:
            break
    return canteen.toXMLFeed()
Exemple #10
0
def parse_url(url,
              today=False,
              canteentype='Mittagsmensa',
              this_week='',
              next_week=True,
              legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') +
                         6] + 'wissenswertes/lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel')
    allergene = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)')
    allergene['EI'] = 'Ei'
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)')
    suballergene = re.compile(
        r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)')
    for tr in legend_doc.find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) != 2:
            continue
        title = tds[0].find('strong')
        if title is None:
            continue
        else:
            title = title.text
        lines = tds[1].text.split('\n')
        for line in lines[1:]:
            try_allergine = suballergene.match(line)
            if try_allergine:
                allergene[try_allergine.group('name')] = try_allergine.group(
                    'value')
        text = lines[0].replace('enthält', '').strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week,
               canteen,
               canteentype,
               allergene=allergene,
               zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche',
                   canteen,
                   canteentype,
                   allergene=allergene,
                   zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week,
                   canteen,
                   canteentype,
                   allergene=allergene,
                   zusatzstoffe=zusatzstoffe)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    try:
        xml_data = urlopen(url).read()
    except Exception:
        return canteen.toXMLFeed()
    root = ET.fromstring(xml_data)
    for day in root:
        date = time.strftime('%d.%m.%Y', time.localtime(int(day.get('timestamp'))))
        for item in day:
            title = item.find('title').text
            description = get_description(title)
            notes = build_notes_string(title)
            plist = [item.find('preis1').text, item.find('preis2').text, item.find('preis3').text]
            food_type = get_food_types(item.find('piktogramme').text)
            canteen.addMeal(date, food_type, description, notes, plist, roles)
    return canteen.toXMLFeed()
Exemple #12
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url + (datetime.date.today()
               + datetime.date.resolution * 7).strftime('/%Y/%W/'), canteen)
    if not today:
        parse_week(url + (datetime.date.today()
                       + datetime.date.resolution * 14).strftime('/%Y/%W/'), canteen)
    return canteen.toXMLFeed()
Exemple #13
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read())
    for day_div in document.find_all('div', 'day') + document.find_all(
            'article', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date')
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = "{}-{}-{}".format(
                year,
                date_test.group('month'),
                date_test.group('day'),
            )
        if 'nodata' in day_div.attrs.get('class',
                                         []) or 'GESCHLOSSEN' in day_div.text:
            canteen.setDayClosed(date)
            continue
        closed_candidate = False
        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue
            if 'geschlossen' in name:
                closed_candidate = True
                continue
            category = meal_article.find('div')['title']
            notes = [
                v['title'] for v in meal_article.find_all('div', 'theicon')
                if v['title']
            ]
            if meal_article.find('div', 'additive'):
                notes += [
                    v[0] for v in extra_regex.findall(
                        meal_article.find('div', 'additive').text)
                ]
            price_div = meal_article.find('div', 'price')
            if price_div is None:
                canteen.addMeal(date, category, name, notes)
                continue
            prices = {}
            for v, r in (('default', 'student'), ('bed', 'employee'),
                         ('guest', 'other')):
                price = price_regex.search(price_div['data-' + v])
                if price:
                    prices[r] = price.group('price')
                elif v == 'default':
                    prices = {}
                    break
            canteen.addMeal(date, category, name, notes, prices)
        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)
    return canteen.toXMLFeed()
Exemple #14
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    day = datetime.date.today()
    for _ in range(21):
        parse_day(canteen, '{}&date={}'.format(url, day.strftime('%Y-%m-%d')))
        if today:
            break
        day += datetime.timedelta(days=1)
    return canteen.toXMLFeed()
Exemple #15
0
    def feed(self, name):
        canteen = LazyBuilder()
        if name in self.xmlnames:
            parse_url(canteen, name) # all categories
        else :
            xmlname_enty = [x for x in self.xmlnames if x[0] == name][0]
            parse_url(canteen, *xmlname_enty) # only certain categories

        return canteen.toXMLFeed()
Exemple #16
0
def parse_url(url, today):
    canteen = LazyBuilder()
    canteen.setAdditionalCharges('student', {})
    if today:
        parse_week(url, canteen)  # base url only contains current day
    else:
        parse_week(url + 'week', canteen)
        parse_week(url + 'nextweek', canteen)

    return canteen.toXMLFeed()
def parse_url(url, today):
    canteen = LazyBuilder()
    canteen.setAdditionalCharges('student', {})
    if today:
        parse_week(url, canteen)  # base url only contains current day
    else:
        parse_week(url + 'week', canteen)
        parse_week(url + 'nextweek', canteen)

    return canteen.toXMLFeed()
Exemple #18
0
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, 'lxml')
    legends = document.find_all('div', {'class': 'legende'})
    if len(legends) > 0:
        extraLegend = {
            int(v[0]): v[1]
            for v in reversed(legend_regex.findall(legends[0].text))
        }
    else:
        extraLegend = {}
    canteen = LazyBuilder()
    for day_td in document.find_all('td', text=day_regex):
        date = day_regex.search(day_td.string).group('date')
        table = None
        for element in day_td.parents:
            if element.name == 'table':
                table = element
                break
        if not table:
            continue
        for tr in table.tbody.find_all('tr'):
            if 'geschlossen' in tr.text or 'Feiertage' in tr.text:
                match = day_range_regex.search(tr.text)
                if not match:
                    canteen.setDayClosed(date)
                else:
                    fromDate = datetime.datetime.strptime(
                        match.group('from'), '%d.%m.%Y')
                    toDate = datetime.datetime.strptime(
                        match.group('to'), '%d.%m.%Y')
                    while fromDate <= toDate:
                        canteen.setDayClosed(fromDate.strftime('%Y-%m-%d'))
                        fromDate += datetime.date.resolution
                continue
            if len(tr) != 2:
                continue  # no meal
            strings = list(tr.contents[0].strings)
            name = strings[0]
            # prices:
            prices = strings[-1].split('|')
            if '-' in map(lambda v: v.strip(), prices):
                prices = {}
            # notes:
            notes = []
            for img in tr.contents[1].find_all('img'):
                notes.append(img['alt'].replace('Symbol', '').strip())
            for extra in list(
                    set(map(lambda v: int(v), extra_regex.findall(tr.text)))):
                if extra in extraLegend:
                    notes.append(extraLegend[extra])
            canteen.addMeal(date, 'Hauptgerichte', name, notes, prices,
                            roles if prices else None)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    base_data = load_base_data()

    canteen = LazyBuilder()
    with urlopen(url) as response:
        data = json.loads(response.read().decode())

    for day in data['days']:
        date = datetime.datetime.strptime(day['date'], UTC_DATE_STRING).date()

        if today and (datetime.date.today() != date):
            continue

        for counter in day['counters']:
            counter_name = counter['displayName']
            counter_description = counter['description']
            counter_hours = counter.get('openingHours')

            for meal in counter['meals']:
                if 'knownMealId' in meal:
                    # This is meant to allow recognizing recurring meals,
                    # for features like marking meals as favorites.
                    # Up to now, not really used in the mensaar.de API,
                    # nor functional in this API parser.
                    # The meal will still be recognized as every other meal.
                    print('knownMealId: %s' % meal['knownMealId'],
                          file=sys.stderr)

                meal_name = meal['name']
                if 'category' in meal:
                    meal_name = '%s: %s' % (meal['category'], meal_name)

                meal_notes = (
                    # The description is typically the location
                    # (but not required to be by the API specification).
                    build_location(counter_description) +
                    build_hours(counter_hours) + build_notes(
                        base_data, meal['notices'], meal['components']))

                meal_prices = {}
                if 'prices' in meal:
                    prices = meal['prices']
                    for role in prices:
                        if role in ROLES:
                            meal_prices[base_data['roles']
                                        [role]] = prices[role]

                if 'pricingNotice' in meal:
                    meal_notes.append(meal['pricingNotice'])

                canteen.addMeal(date, counter_name, meal_name, meal_notes,
                                meal_prices)

    return canteen.toXMLFeed()
Exemple #20
0
def parse_url(url, today=False):
    base_data = load_base_data()

    canteen = LazyBuilder()
    with urlopen(url) as response:
        data = json.loads(response.read().decode())

    for day in data['days']:
        date = datetime.datetime.strptime(day['date'], UTC_DATE_STRING).date()

        if today and (datetime.date.today() != date):
            continue

        for counter in day['counters']:
            counter_name = counter['displayName']
            counter_description = counter['description']
            counter_hours = counter.get('openingHours')

            for meal in counter['meals']:
                if 'knownMealId' in meal:
                    # This is meant to allow recognizing recurring meals,
                    # for features like marking meals as favorites.
                    # Up to now, not really used in the mensaar.de API,
                    # nor functional in this API parser.
                    # The meal will still be recognized as every other meal.
                    print('knownMealId: %s' % meal['knownMealId'], file=sys.stderr)

                meal_name = meal['name']
                if 'category' in meal:
                    meal_name = '%s: %s' % (meal['category'], meal_name)

                meal_notes = (
                    # The description is typically the location
                    # (but not required to be by the API specification).
                    build_location(counter_description) +
                    build_hours(counter_hours) +
                    build_notes(base_data, meal['notices'], meal['components']))

                meal_prices = {}
                if 'prices' in meal:
                    prices = meal['prices']
                    for role in prices:
                        if role in ROLES:
                            meal_prices[base_data['roles'][role]] = prices[role]

                if 'pricingNotice' in meal:
                    meal_notes.append(meal['pricingNotice'])

                canteen.addMeal(date, counter_name,
                                meal_name, meal_notes, meal_prices)

    return canteen.toXMLFeed()
Exemple #21
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(legend, str(dom), regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)')
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select('td[class="mensa_col_55"] > span')
                    if notesElement != None and len(notesElement) > 0 and notesElement[0].text != None:
                        notes = [legend.get(n, n) for n in notesElement[0].text.split(' ') if n]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(groupElement.contents) > 0 and priceElement.contents != None and len(priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()
Exemple #22
0
def render_menu(menu):
    """Render the menu for a canteen into an OpenMensa XML feed.

    :param dict menu: the Python representation of the API JSON response
    :return: the XML feed as string
    """
    builder = LazyBuilder()

    if menu:
        for day in _active_days(menu):
            _process_day(builder, day)

    return builder.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read(), 'lxml')

    for day_div in document.find_all('div', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date "{}"'.format(
                day_div['data-day']))
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = '{}-{}-{}'.format(year, date_test.group('month'),
                                     date_test.group('day'))

        closed_candidate = day_div.find('div', 'holiday') is not None

        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue

            category = meal_article.find('div', 'icon')['title']
            notes = []
            prices = {}

            additives = meal_article.find('div', 'additnr')
            if additives:
                notes += [
                    additive.text for additive in additives.find_all('li')
                ]
            notes += [
                v['title'] for v in meal_article.find_all('div', 'theicon')
                if v['title'] and v['title'] not in notes
            ]

            price_div = meal_article.find('div', 'price')
            if price_div:
                for k, v in price_map.items():
                    price = price_div['data-' + k]
                    if price:
                        prices[v] = price
            canteen.addMeal(date, category, name, notes, prices)

        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)

    return canteen.toXMLFeed()
Exemple #24
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    day = datetime.date.today()
    emptyCount = 0
    while emptyCount < 7:
        if not parse_day(canteen, '{}&day={}&month={}&year={}&limit=25'
                         .format(url, day.day, day.month, day.year),
                         day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        day += datetime.date.resolution
    return canteen.toXMLFeed()
Exemple #25
0
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content)
    legends = document.find_all('div', {'class': 'legende'})
    if len(legends) > 0:
        extraLegend = {int(v[0]): v[1] for v in reversed(legend_regex.findall(legends[0].text))}
    else:
        extraLegend = {}
    canteen = LazyBuilder()
    for day_td in document.find_all('td', text=day_regex):
        date = day_regex.search(day_td.string).group('date')
        table = None
        for element in day_td.parents:
            if element.name == 'table':
                table = element
                break
        if not table:
            continue
        for tr in table.tbody.find_all('tr'):
            if 'geschlossen' in tr.text or 'Feiertage' in tr.text:
                match = day_range_regex.search(tr.text)
                if not match:
                    canteen.setDayClosed(date)
                else:
                    fromDate = datetime.datetime.strptime(match.group('from'), '%d.%m.%Y')
                    toDate = datetime.datetime.strptime(match.group('to'), '%d.%m.%Y')
                    while fromDate <= toDate:
                        canteen.setDayClosed(fromDate.strftime('%Y-%m-%d'))
                        fromDate += datetime.date.resolution
                continue
            if len(tr) != 3:
                continue  # no meal
            strings = list(tr.contents[0].strings)
            name = strings[0]
            # prices:
            prices = strings[-1].split('|')
            print(prices)
            if '-' in map(lambda v: v.strip(), prices):
                prices = {}
            # notes:
            notes = []
            for img in tr.contents[1].find_all('img'):
                notes.append(img['alt'].replace('Symbol', '').strip())
            for extra in list(set(map(lambda v: int(v), extra_regex.findall(tr.text)))):
                if extra in extraLegend:
                    notes.append(extraLegend[extra])
            canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None)
    return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') + 6] + 'lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url))
    canteen.setLegendData(
        text=legend_doc.find(id='artikel').text,
        regex=r'(?P<name>(\d+|[A-Z]+))\s+=\s+(?P<value>\w+( |\t|\w)*)'
    )
    parse_week(url + this_week, canteen, canteentype)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche', canteen, canteentype)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype)
    return canteen.toXMLFeed()
Exemple #27
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read())
    for day_div in document.find_all('div', 'day') + document.find_all('article', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date')
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = "{}-{}-{}".format(year, date_test.group('month'), date_test.group('day'), )
        if 'nodata' in day_div.attrs.get('class', []) or 'GESCHLOSSEN' in day_div.text:
            canteen.setDayClosed(date)
            continue
        closed_candidate = False
        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue
            if 'geschlossen' in name:
                closed_candidate = True
                continue
            category = meal_article.find('div', 'desc').text
            notes = [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title']]
            if meal_article.find('div', 'additive'):
                notes += [v[0] for v in extra_regex.findall(meal_article.find('div', 'additive').text)]
            price_div = meal_article.find('div', 'price')
            if price_div is None:
                canteen.addMeal(date, category, name, notes)
                continue
            prices = {}
            for v, r in (('default', 'student'), ('bed', 'employee'), ('guest', 'other')):
                price = price_regex.search(price_div['data-' + v])
                if price:
                    prices[r] = price.group('price')
                elif v == 'default':
                    prices = {}
                    break
            canteen.addMeal(date, category, name, notes, prices)
        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)
    return canteen.toXMLFeed()
Exemple #28
0
    def feed_all(self, name):
        canteen = LazyBuilder()

        date = self.__now()

        # Get this week
        while self.handler(canteen, name, date.date()):
            date += datetime.timedelta(days=1)

        # Skip over weekend
        if date.weekday() > 4:
            date += datetime.timedelta(days=7-date.weekday())

            # Get next week
            while self.handler(canteen, name, date.date()):
                date += datetime.timedelta(days=1)

        return canteen.toXMLFeed()
Exemple #29
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    day = datetime.date.today()
    emptyCount = 0
    totalCount = 0
    while emptyCount < 7 and totalCount < 32:
        if not parse_day(
                canteen, '{}&day={}&month={}&year={}&limit=25'.format(
                    url, day.day, day.month, day.year),
                day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        totalCount += 1
        day += datetime.date.resolution
    return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') + 6] + 'wissenswertes/lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel')
    allergene = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)'
    )
    allergene['EI'] = 'Ei'
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)'
    )
    suballergene = re.compile(r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)')
    for tr in legend_doc.find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) != 2:
            continue
        title = tds[0].find('strong')
        if title is None:
            continue
        else:
            title = title.text
        lines = tds[1].text.split('\n')
        for line in lines[1:]:
            try_allergine = suballergene.match(line)
            if try_allergine:
                allergene[try_allergine.group('name')] = try_allergine.group('value')
        text = lines[0].replace('enthält', '').strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week, canteen, canteentype,
               allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche', canteen, canteentype,
                   allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype,
                   allergene=allergene, zusatzstoffe=zusatzstoffe)
    return canteen.toXMLFeed()
Exemple #31
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read(), 'lxml')

    for day_div in document.find_all('div', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date "{}"'.format(day_div['data-day']))
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = '{}-{}-{}'.format(year, date_test.group('month'), date_test.group('day'))

        closed_candidate = day_div.find('div', 'holiday') is not None

        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue

            category = meal_article.find('div', 'icon')['title']
            notes = []
            prices = {}

            additives = meal_article.find('div', 'additnr')
            if additives:
                notes += [additive.text for additive in additives.find_all('li')]
            notes += [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title'] and v['title'] not in notes]

            price_div = meal_article.find('div', 'price')
            if price_div:
                for k, v in price_map.items():
                    price = price_div['data-' + k]
                    if price:
                        prices[v] = price
            canteen.addMeal(date, category, name, notes, prices)

        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    global legend
    canteen = LazyBuilder()
    canteen.setLegendData(legend)
    day = datetime.date.today()
    emptyCount = 0
    totalCount = 0
    while emptyCount < 7 and totalCount < 32:
        if not parse_day(canteen, '{}&tag={}&monat={}&jahr={}'
                         .format(url, day.day, day.month, day.year),
                         day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        totalCount += 1
        day += datetime.date.resolution
    return canteen.toXMLFeed()
Exemple #33
0
def parse_url(url, today=False):
    global legend
    canteen = LazyBuilder()
    canteen.setLegendData(legend)
    day = datetime.date.today()
    emptyCount = 0
    totalCount = 0
    while emptyCount < 7 and totalCount < 32:
        if not parse_day(
                canteen, '{}&tag={}&monat={}&jahr={}'.format(
                    url, day.day, day.month, day.year),
                day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        totalCount += 1
        day += datetime.date.resolution
    return canteen.toXMLFeed()
Exemple #34
0
def parse_url(url, data_canteen, today=False):
    canteen = LazyBuilder()

    data = urlopen(url).read().decode('utf-8')
    document = parse(data, 'lxml')

    dish = document.find(class_='neo-menu-single-dishes')
    if dish is not None:
        dishes = dish.find_all(name='tr', attrs={"data-canteen": data_canteen})
    else:
        dishes = []

    side = document.find(class_='neo-menu-single-modals')
    if side is not None:
        dishes = dishes + side.find_all(name='tr', attrs={"data-canteen": data_canteen})

    for dish in dishes:
        parse_dish(dish, canteen)

    return canteen.toXMLFeed()
Exemple #35
0
def parse_url(url, data_canteen, today=False):
    canteen = LazyBuilder()

    data = urlopen(url).read().decode('utf-8')
    document = parse(data, 'lxml')

    dish = document.find(class_='neo-menu-single-dishes')
    if dish is not None:
        dishes = dish.find_all(name='tr', attrs={"data-canteen": data_canteen})
    else:
        dishes = []

    side = document.find(class_='neo-menu-single-modals')
    if side is not None:
        dishes = dishes + side.find_all(name='tr',
                                        attrs={"data-canteen": data_canteen})

    for dish in dishes:
        parse_dish(dish, canteen)

    return canteen.toXMLFeed()
Exemple #36
0
def parse_url(url, today=False):
    canteen = LazyBuilder()

    canteen.extra_regex = re.compile('\((?P<extra>[0-9a-zA-Z]{1,3}'
                                     '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE)

    legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/'
    legend = parse_legend(legend_url)
    canteen.setLegendData(legend)

    day = datetime.date.today()
    week = getWeekdays(day)

    for wDay in week:
        py = {'tx_pamensa_mensa[date]' : wDay}
        payload = urlencode(py).encode('ascii')
        data = rq.urlopen(url, payload).read().decode('utf-8')
        soup = BeautifulSoup(data, 'html.parser')
        parse_day(canteen, soup, wDay)

    return canteen.toXMLFeed()
Exemple #37
0
def parse_url(url, today=False):
    canteen = LazyBuilder()

    canteen.extra_regex = re.compile(
        '\((?P<extra>[0-9a-zA-Z]{1,3}'
        '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE)

    legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/'
    legend = parse_legend(legend_url)
    canteen.setLegendData(legend)

    day = datetime.date.today()
    week = getWeekdays(day)

    for wDay in week:
        py = {'tx_pamensa_mensa[date]': wDay}
        payload = urlencode(py).encode('ascii')
        data = rq.urlopen(url, payload).read().decode('utf-8')
        soup = BeautifulSoup(data, 'html.parser')
        parse_day(canteen, soup, wDay)

    return canteen.toXMLFeed()
Exemple #38
0
def render_meta(canteen, menu_feed_url):
    """Render a OpenMensa XML meta feed for a given canteen.

    :param Canteen canteen: the canteen
    :param menu_feed_url: the canteen menu URL
    :return: the XML meta feed as string
    """
    builder = LazyBuilder()

    builder.name = canteen.name
    builder.address = canteen.street
    builder.city = canteen.city

    builder.define(name='full',
                   priority='0',
                   url=menu_feed_url,
                   source=None,
                   dayOfWeek='*',
                   dayOfMonth='*',
                   hour='8-18',
                   minute='0',
                   retry='30 1')

    return builder.toXMLFeed()
Exemple #39
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(
            legend,
            str(dom),
            regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)'
        )
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[
                        0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select(
                        'td[class="mensa_col_55"] > span')
                    if notesElement != None and len(
                            notesElement) > 0 and notesElement[0].text != None:
                        notes = [
                            legend.get(n, n)
                            for n in notesElement[0].text.split(' ') if n
                        ]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(
                                groupElement.contents
                        ) > 0 and priceElement.contents != None and len(
                                priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()
Exemple #40
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url, date.today(), canteen)
    if not today:
        parse_week(url, date.today() + date.resolution * 7, canteen)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, "lxml")
    canteen = LazyBuilder()
    table = document.find_all('table')[0]

    def debug_print(food_type, food_description, priceing):
        if (priceing is None):
            print(date + ': ' + food_type + ": " + food_description)
        else:
            print(date + ': ' + food_type + ": " + food_description + " : ",
                  end='')
            for e in priceing:
                print(e, end=' ')
            print()

    def is_new_entry(tds):
        td = tds[0]
        return td.string is not None and date_regex.search(
            td.string) is not None

    def is_end_of_entry(tds):
        for td in tds:
            if (td.string is None or td.string.strip() != ''):
                return False
        return True

    def is_action_entry(td):
        return td.text == 'Aktion'

    def is_closed(tds):
        return is_new_entry(tds) and get_pricing(tds, 4, 7) is None

    def refactor_date(raw_date):
        now = datetime.datetime.now()
        day = date_regex.search(raw_date).group('day')
        month = date_regex.search(raw_date).group('month')
        year = now.year
        if month == '01' and now.month == 12:
            # if list depicts meals from this and the next year
            year += 1
        elif month == '12' and now.month == 1:
            # if list depicts meals form this and the last year
            year -= 1
        return day + '.' + month + '.' + str(year)

    def parse_foot_type(td):
        type = ''
        if td.string is None:
            img = td.find_all('img')[0]
            src = img.get('src')
            if ('msc' in src):
                type += 'Fish MSC '
            elif ('vegan' in src):
                type += 'Vegan '
        #Sometimes none categorized food is possible, therfore we need to cover this,
        #otherwhise openmensa.org will faile dueto an empty tag.
        elif (td.string.strip() == ''):
            type += 'Tipp '
        else:
            if ('R' in td.string):
                type += 'Rind '
            if ('S' in td.string):
                type += 'Schwein '
            if ('G' in td.string):
                type += 'Geflügel '
            if ('V' in td.string):
                type += 'Vegetarisch '
            if ('F' in td.string):
                type += 'Fisch '
            if ('L' in td.string):
                type += 'Lamm '
            if ('W' in td.string):
                type += 'Wild '
        tl = list(type)[:-1]
        return ''.join(tl)

    def get_refs(td):
        return td.find_all('sup')

    def get_foot_description(td):
        refl = get_refs(td)
        description = td.text
        for ref in refl:
            description = description.replace(' ' + ref.text, '', 1)
        if description[0] == ' ':
            description = description.replace(' ', '', 1)
        return description

    def get_notes(td):
        refl = get_refs(td)
        strl = []
        for ref in refl:
            strl.extend(ref.string.split(','))
        strl = list(set(strl))
        return strl

    def build_notes_string(td):
        refs = get_notes(td)
        food_is = ''
        food_contains = ''
        for r in refs:
            # parse food is footnotes
            if r == '1':
                food_is += 'mit Farbstoffen, '
            elif r == '4':
                food_is += 'geschwärzt, '
            elif r == '7':
                food_is += 'mit Antioxidationsmittel, '
            elif r == '8':
                food_is += 'mit Geschmacksverstärker, '
            elif r == '9':
                food_is += 'geschwefelt, '
            elif r == '10':
                food_is += 'geschwärzt, '
            elif r == '11':
                food_is += 'gewachst, '
            elif r == '12':
                food_is += 'mit Phosphat, '
            elif r == '5':
                food_is += 'mit Süßungsmittel, '
            # parse allergic footnotes
            elif r == 'a1':
                food_contains += 'Gluten, '
            elif r == 'a2':
                food_contains += 'Krebstiere, '
            elif r == 'a3':
                food_contains += 'Eier, '
            elif r == 'a4':
                food_contains += 'Fisch, '
            elif r == 'a5':
                food_contains += 'Erdnüsse, '
            elif r == 'a6':
                food_contains += 'Soja, '
            elif r == 'a7':
                food_contains += 'Milch/Laktose, '
            elif r == 'a8':
                food_contains += 'Schalenfrüchte, '
            elif r == 'a9':
                food_contains += 'Sellerie, '
            elif r == 'a10':
                food_contains += 'Senf, '
            elif r == 'a11':
                food_contains += 'Sesam, '
            elif r == 'a12':
                food_contains += 'Schwefeldioxid/Sulfite, '
            elif r == 'a13':
                food_contains += 'Lupinen, '
            elif r == 'a14':
                food_contains += 'Weichtiere, '
            else:
                food_contains += 'undefinierte Chemikalien:' + r + ', '
        notes = ''
        if food_is != '':
            notes += 'Gericht ist ' + food_is
        if food_contains != '':
            if food_is == '':
                notes += 'Gericht enthält '
            else:
                notes += 'und enthält '
            notes += food_contains
        if notes != '':
            nl = list(notes)
            del nl[len(nl) - 1]
            nl[len(nl) - 1] = '.'
            notes = ''.join(nl)
        return notes

    def get_pricing(tds, f, t):
        priceing = []
        #sometimes we dont don't get 7 elements, than this might be a special day
        if len(tds) < 7:
            return None
        for i in range(f, t):
            raw_price = tds[i].string.strip()
            if raw_price == '':
                return None
            else:
                priceing.append(price_regex.search(raw_price).group('val'))
        return priceing

    # state helper
    inside_valide_entry = False
    date = ''

    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if (is_new_entry(tds)):
            try:
                raw_date = tds[0].string
                date = refactor_date(raw_date)
                if (is_closed(tds)):
                    # sometismes a canteen might look closed but actually its spargeltage
                    if "Spargeltage" in tds[3].text:
                        canteen.addMeal(date, "Spargel", "Spargel Tag",
                                        ["Spargel und andere Gerichte."], None,
                                        None)
                    else:
                        canteen.setDayClosed(date)
                else:
                    inside_valide_entry = True
            except Exception as e:
                traceback.print_exception(*sys.exc_info())
        if (is_end_of_entry(tds)):
            inside_valide_entry = False
        elif inside_valide_entry:
            try:
                notes = []
                if is_action_entry(tds[0]):
                    food_type = parse_foot_type(tds[1])
                    food_description = get_foot_description(tds[2])
                    notes_string = build_notes_string(tds[2])
                    if (notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 3, 6)
                    canteen.addMeal(date, 'Aktion: ' + food_type,
                                    food_description, notes, prices,
                                    roles if prices else None)
                else:
                    food_type = parse_foot_type(tds[2])
                    food_description = get_foot_description(tds[3])
                    notes_string = build_notes_string(tds[3])
                    if (notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 4, 7)
                    canteen.addMeal(date, food_type, food_description, notes,
                                    prices, roles if prices else None)
            except Exception as e:
                traceback.print_exception(*sys.exc_info())

    return canteen.toXMLFeed()
Exemple #42
0
def parse_url(url, today):
    canteen = LazyBuilder()
    if not today:
        parse_week(url, canteen)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {
        '1': 'mit Farbstoff',
        '2': 'mit Konservierungsstoff',
        '3': 'mit Antioxidationsmittel',
        '4': 'mit Geschmacksverstärker',
        '5': 'geschwefelt',
        '6': 'geschwärzt',
        '7': 'gewachst',
        '8': 'mit Phosphat',
        '9': 'mit Süssungsmittel Saccharin',
        '10': 'mit Süssungsmittel Aspartam, enth. Phenylalaninquelle',
        '11': 'mit Süssungsmittel Cyclamat',
        '12': 'mit Süssungsmittel Acesulfam',
        '13': 'chininhaltig',
        '14': 'coffeinhaltig',
        '15': 'gentechnisch verändert',
        '16': 'enthält Sulfite',
        '17': 'enthält Phenylalanin',
        'A': 'Gluten',
        'AA': 'Weizen',
        'AB': 'Roggen',
        'AC': 'Gerste',
        'AD': 'Hafer',
        'AE': 'Dinkel',
        'AF': 'Kamut',
        'B': 'Krebstiere',
        'C': 'Eier',
        'D': 'Fisch',
        'E': 'Erdnüsse',
        'F': 'Soja',
        'G': 'Milch und Milchprodukte',
        'H': 'Schalenfrüchte',
        'HA': 'Mandel',
        'HB': 'Haselnuss',
        'HC': 'Walnuss',
        'HD': 'Cashew',
        'HE': 'Pecannuss',
        'HF': 'Paranuss',
        'HG': 'Pistazie',
        'HH': 'Macadamianuss',
        'HI': 'Queenslandnuss',
        'I': 'Sellerie',
        'J': 'Senf',
        'K': 'Sesamsamen',
        'L': 'Schwefeldioxid und Sulfite',
        'M': 'Lupinen',
        'N': 'Weichtiere',
        'O': 'Nitrat',
        'P': 'Nitritpökelsalz',
        'ZTA': 'Alkohol',
        'ZTB': 'mit ausschließlich biologisch erzeugten Rohstoffen',
        'ZTF': 'Fisch',
        'ZTG': 'Geflügel',
        'ZTL': 'Lamm',
        'ZTMSC': 'zertifizierte nachhaltige Fischerei (MSC-C-53400)',
        'ZTMV': 'Mensa Vital',
        'ZTR': 'Rindfleisch',
        'ZTS': 'Schweinefleisch',
        'ZTV': 'vegetarisch',
        'ZTVG': 'vegan',
        'ZTW': 'Wild'
    }

    # Create regular expressions for categories
    hg = re.compile("^HG[1-9]$")
    b = re.compile("^B[1-9]$")
    n = re.compile("^N[1-9]$")

    # Get two weeks for full.xml and only the current one for today.xml
    # On error 404 continue with next isoweek
    # Returns an empty feed if both isoweeks result in error 404
    # At most locations the data doesn't exist on term break
    weeks = 1 if today else 2
    for w in range(0, weeks):
        kw = (date.today() + timedelta(weeks=w)).isocalendar()[1]
        try:
            f = urlopen('%(location)s/%(isoweek)d.csv' % {
                'location': url,
                'isoweek': kw
            })
        except HTTPError as e:
            if e.code == 404:
                continue
            else:
                raise e

        # Decode data from ISO charset
        f = f.read().decode('iso8859-1')

        # Set roles for prices
        roles = ('student', 'employee', 'other')

        # Read csv data and skip the csv header
        mealreader = reader(f.splitlines(), delimiter=';')
        next(mealreader)
        for row in mealreader:
            mdate = row[0]
            category = row[2]
            mname = row[3]
            mtype = row[4]
            prices = [row[6], row[7], row[8]]

            # determine category for the current meal
            if category == 'Suppe':
                pass
            elif hg.match(category):
                category = 'Hauptgerichte'
            elif b.match(category):
                category = 'Beilagen'
            elif n.match(category):
                category = 'Nachspeisen'
            else:
                raise RuntimeError('Unknown category: ' + str(category))

            # Extract the notes from brackets in the meal name
            # Remove the brackets, notes and improve readability
            notes = []
            bpos = mname.find(')')
            while bpos != -1:
                apos = mname.find('(')
                # Extract notes from current brackets and avoid empty notes
                for i in mname[apos + 1:bpos].split(','):
                    if i:
                        notes.append(i)
                # Check if brackets are at the end of the meal name
                if bpos == len(mname) - 1:
                    # Remove brackets and break bracket loop
                    mname = mname[:apos]
                    bpos = -1
                else:
                    # Remove current brackets, improve readability
                    # and find the next brackets
                    mname = mname[:apos].rstrip(
                    ) + ' und ' + mname[bpos + 1:].lstrip()
                    bpos = mname.find(')')

            # Remove trailing whitespaces in the meal name
            mname = mname.rstrip()

            # Add meal type notes to notes list and avoid empty notes
            for i in mtype.split(','):
                if i:
                    notes.append('ZT' + i)

            # Translate notes via legend to human readable information
            mnotes = []
            for i in notes:
                mnotes.append(legend.get(i, legend.get(i[2:], i)))

            # Try to add the meal
            try:
                canteen.addMeal(mdate, category, mname, mnotes, prices, roles)
            except ValueError as e:
                print('could not add meal {}/{} "{}" due to "{}"'.format(
                    mdate, category, mname, e),
                      file=sys.stderr)
                # empty meal ...
                pass

    # return xml data
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()

    # prices are stored on a separate page
    document = parse(urlopen(base + '/mensa-preise/').read(), 'lxml')
    prices = {}
    for tr in document.find('div', 'ce-bodytext').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get(
                'class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')

    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read(), 'lxml')
            errorCount = 0
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.timedelta(days=1)
                continue
            else:
                raise e

        # extract legend
        legend = {}
        legends = document.find('div', 'tx-stwm-speiseplan')
        additions = legends.find('div', 'c-schedule__filter-body')
        for table in additions.find_all('div', 'c-schedule__filter-item'):
            for ingredient in table.find('ul').find_all('li'):
                name = ingredient.find('dt').text.strip()
                description = ingredient.find('dd').text.strip()
                legend[name] = description
        for label in legends.find('ul',
                                  'c-schedule__type-list').find_all('li'):
            name = label.find('dt').text.replace('(', '').replace(')',
                                                                  '').strip()
            description = label.find('dd').text.strip()
            legend[name] = description

        # extract meals
        mensa_data = document.find('ul', 'c-schedule__list')
        category = None
        for meal in mensa_data.find_all('li'):
            # update category or use previous one if not specified
            category_text = meal.find('dt', 'c-schedule__term').text.strip()
            if category_text:
                category = category_text

            data = meal.find('dd').find('p', 'js-schedule-dish-description')
            name = data.contents[0].strip()  # name is the first text node
            if not name:
                continue

            # notes are contained in 3 boxes (type, additional, allergen) and
            # are comma-separated lists enclosed in brackets or parentheses
            notes = []
            for note in meal.find_all('span', 'c-schedule__marker'):
                note_text = note.find('span', 'u-text-sup').text \
                    .replace('(', '').replace(')', '') \
                    .replace('[', '').replace(']', '')
                notes += [n for n in note_text.split(',') if n]

            # some meals contain the GQB label in their name (instead of in notes)
            if '(GQB)' in name:
                name = name.replace('(GQB)', '').strip()
                notes.append('GQB')

            # the price for both meals is specified as Bio-/Aktionsgericht
            price_category = category \
                .replace('Aktionsessen', 'Bio-/Aktionsgericht') \
                .replace('Biogericht', 'Bio-/Aktionsgericht') \
                .strip()

            canteen.addMeal(date, category, name,
                            [legend.get(n, n) for n in notes],
                            prices.get(price_category, {}))

        date += datetime.timedelta(days=1)
        if today:
            break

    return canteen.toXMLFeed()
def main(url='https://www.stw-bremen.de/de/essen-trinken/mensa-nw-1',
         out='xml'):

    # TODO: replace ids with a findall food-plan-* wildcard
    data = {}  # dict to store parsed data
    today = dt.date.today()

    s = requests.session()
    r = s.get(url)  # get request from stw server
    html = r.content  # the raw html code of the returned page
    soup = BeautifulSoup(html, 'html.parser')  # source code parser

    canteen = LazyBuilder()

    days = soup.find_all(id=re.compile("^food-plan-"))
    #print(len(days))
    #for id in ids:  # for each day
    for html_day in days:
        date_id = html_day['id']  # food-plan-3
        workday_offset = int(date_id.split('-')[-1])
        #print(workday_offset)
        date = get_date_from_id(workday_offset)
        date_str = dt.datetime.strftime(date, '%Y-%m-%d')
        data[date_str] = {}  # init dict for each id
        # The information for each meal is stored in a seperate table with class
        # food-category, to get all categories (not hardcoded loop them)
        html_meals = html_day.find_all("table", "food-category")
        for meal in html_meals:
            # meal is still a html code string
            category_name = meal.find('th', 'category-name').string
            meal_text = ''
            # since there are added line breaks and <sup> tags, I use the strings
            # generator instead of the get_text() or .text methods
            meal_parts = meal.find('td',
                                   'field-name-field-description').strings
            for m in meal_parts:  # m is an iteratable part of the html contents
                if not m.parent.name == 'sup':
                    meal_text += str(m)
            #meal_text = meal_text.rstrip()  # remove win/unix linebreaks
            meal_text = meal_text.replace('\r', '')
            meal_text = meal_text.replace('\n', ' ')
            meal_text = meal_text.replace('* * *', '; ')
            meal_price_a = meal.find('td',
                                     'field-name-field-price-students').text
            meal_price_b = meal.find('td',
                                     'field-name-field-price-employees').text

            m = {}
            m['text'] = meal_text
            m['A'] = meal_price_a
            m['B'] = meal_price_b
            data[date_str][category_name] = m

            #Use LazyBuilder:
            canteen.addMeal(date,
                            category_name,
                            meal_text,
                            prices={
                                'student': meal_price_a,
                                'employee': meal_price_b
                            })
    om = canteen.toXMLFeed()

    #print(data)
    j = json.dumps(data, ensure_ascii=False)  # without s saves to file
    #print(j)

    if out == 'xml':
        return om
    elif out == 'json':
        return j
Exemple #46
0
def parse_url(url, today=False):
    today = datetime.date.today()
    if today.weekday() == 6:  # Sunday
        today += datetime.timedelta(days=1)  # Tomorrow

    if "%s" in url:
        url = url % today.strftime('%Y_%m_%d')

    try:
        content = requests.get(url).text
    except requests.exceptions.ConnectionError as e:
        logging.warning(str(e))
        content = requests.get(url, verify=False).text

    document = BeautifulSoup(content, "html.parser")
    canteen = LazyBuilder()

    # Prices for employees and guests
    try:
        p = price_employee_regex.search(document.find("main").text).groupdict()
        employee = float(p["employee"].split(",")[0]) + \
            float(p["employee"].split(",")[1]) / 100

        p = price_guest_regex.search(document.find("main").text).groupdict()
        guest = float(p["employee"].split(",")[0]) + \
            float(p["employee"].split(",")[1]) / 100
    except (AttributeError, TypeError, KeyError, ValueError):
        employee_multiplier = 1.25
        guest_multiplier = 1.60
        employee = None
        guest = None

    # Date
    p = datespan_regex.search(document.find(
        "div", {"class": "maincontent"}).find("h2").text).groupdict()

    if len(p["from"].split(".")[2]) == 0:
        p["from"] += p["to"].split(".")[2]
    fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y")

    maincontent = document.find("div", {"class": "maincontent"})
    table = maincontent.find("table")
    if not table:
        if maincontent:
            # Die Speisenausgabe DHBW Eppelheim ist vom dd.mm.yyyy – dd.mm.yyyy
            # geschlossen
            p = datespan_regex.search(maincontent.text)
            if p:
                fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y")
                todate = datetime.datetime.strptime(p["to"], "%d.%m.%Y")
                while fromdate <= todate:
                    canteen.setDayClosed(fromdate.strftime('%d.%m.%Y'))
                    fromdate += datetime.timedelta(1)

        return canteen.toXMLFeed()

    trs = table.find_all("tr")

    date = None
    for tr in trs:

        tds = tr.find_all("td")

        if len(tds) == 4:
            td0, td1, td2, td3 = tds

            day = td0.text.strip()

            date = fromdate + datetime.timedelta(days=daysGerman.index(day))
            date = date.strftime('%d.%m.%Y')

        else:
            td0 = None
            td1, td2, td3 = tds

        notes = []

        if "feiertag" in td1.text.lower() or "geschlossen" in td1.text.lower():
            canteen.setDayClosed(date)
            continue

        categoryName = td1.text.strip()[:-1]
        mealName = td2.text.strip()

        if not categoryName or not mealName:
            continue

        prices = []
        try:
            price = float(euro_regex.search(
                td3.text).group(1).replace(",", "."))
            prices.append(price)
            if employee is not None:
                prices.append(employee)
            else:
                prices.append(price * employee_multiplier)
            if guest is not None:
                prices.append(guest)
            else:
                prices.append(price * guest_multiplier)
        except (AttributeError, TypeError, KeyError, ValueError):
            notes.append(td3.text.strip())

        notes = [x for x in notes if x]
        canteen.addMeal(date, categoryName, mealName, notes if notes else None,
                        prices if prices else None, roles if prices else None)

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {
        '1':     'mit Farbstoff',
        '2':     'mit Konservierungsstoff',
        '3':     'mit Antioxidationsmittel',
        '4':     'mit Geschmacksverstärker',
        '5':     'geschwefelt',
        '6':     'geschwärzt',
        '7':     'gewachst',
        '8':     'mit Phosphat',
        '9':     'mit Süssungsmittel Saccharin',
        '10':    'mit Süssungsmittel Aspartam, enth. Phenylalaninquelle',
        '11':    'mit Süssungsmittel Cyclamat',
        '12':    'mit Süssungsmittel Acesulfam',
        '13':    'chininhaltig',
        '14':    'coffeinhaltig',
        '15':    'gentechnisch verändert',
        '16':    'enthält Sulfite',
        '17':    'enthält Phenylalanin',
        'A':     'Gluten',
        'B':     'Krebstiere',
        'C':     'Eier',
        'D':     'Fisch',
        'E':     'Erdnüsse',
        'F':     'Soja',
        'G':     'Milch und Milchprodukte',
        'H':     'Schalenfrüchte',
        'I':     'Sellerie',
        'J':     'Senf',
        'K':     'Sesamsamen',
        'L':     'Schwefeldioxid und Sulfite',
        'M':     'Lupinen',
        'N':     'Weichtiere',
        'ZTA':   'Aktionsgericht',
        'ZTB':   'mit ausschließlich biologisch erzeugten Rohstoffen',
        'ZTF':   'Fisch',
        'ZTG':   'Geflügel',
        'ZTL':   'Lamm',
        'ZTMSC': 'zertifizierte nachhaltige Fischerei (MSC-C-53400)',
        'ZTMV':  'Mensa Vital',
        'ZTR':   'Rindfleisch',
        'ZTS':   'Schweinefleisch',
        'ZTV':   'vegetarisch',
        'ZTVG':  'vegan',
        'ZTW':   'Wild'
    }
    #canteen.setLegendData(legend)

    hg = re.compile("^HG[1-9]$")
    b = re.compile("^B[1-9]$")
    n = re.compile("^N[1-9]$")

    #for w in 0, 1:
    for w in [0]:
        kw = (date.today() + timedelta(weeks=w)).isocalendar()[1]
        try:
            f = urlopen('%(location)s/%(isoweek)d.csv' %
                        {'location': url, 'isoweek': kw})
        except HTTPError as e:
            if e.code == 404:
                continue
            else:
                raise e
        f = f.read().decode('iso8859-1')

        roles = ('student', 'employee', 'other')

        initline = True
        mealreader = reader(f.splitlines(), delimiter=';')
        for row in mealreader:
            if initline:
                initline = False
            else:
                if row[2] == 'Suppe':
                    category = 'Suppe'
                elif hg.match(row[2]):
                    category = 'Hauptgerichte'
                elif b.match(row[2]):
                    category = 'Beilagen'
                elif n.match(row[2]):
                    category = 'Nachspeisen'
                else:
                    raise RuntimeError('Unknown category: ' + str(row[2]))

                mdate = row[0]
                notes = []

                mname = row[3]
                bpos = mname.find(')')
                while bpos != -1:
                    apos = mname.find('(')
                    for i in mname[apos+1:bpos].split(','):
                        notes.append(i)
                    if bpos == len(mname)-1:
                        mname = mname[:apos] + mname[bpos+1:]
                        bpos = -1
                    else:
                        mname = mname[:apos] + ' und ' + mname[bpos+1:]
                        bpos = mname.find(')')
                if mname.rfind(' ') == len(mname)-1:
                    mname = mname[:len(mname)-1]

                mtype = row[4]
                if mtype != '':
                    for i in mtype.split(','):
                        notes.append('ZT' + i)

                prices = [row[6], row[7], row[8]]

                mnotes = []
                for i in notes:
                    mnotes.append(legend.get(i, legend.get(i[2:], i)))

                try:
                    canteen.addMeal(mdate, category, mname,
                                    mnotes, prices, roles)
                except ValueError as e:
                    print('could not add meal {}/{} "{}" due to "{}"'.format(mdate, category, mname, e), file=sys.stderr)
                    # empty meal ...
                    pass

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {
        '1': 'mit Farbstoff',
        '2': 'mit Konservierungsstoff',
        '3': 'mit Antioxidationsmittel',
        '4': 'mit Geschmacksverstärker',
        '5': 'geschwefelt',
        '6': 'geschwärzt',
        '7': 'gewachst',
        '8': 'mit Phosphat',
        '9': 'mit Süssungsmittel Saccharin',
        '10': 'mit Süssungsmittel Aspartam, enth. Phenylalaninquelle',
        '11': 'mit Süssungsmittel Cyclamat',
        '12': 'mit Süssungsmittel Acesulfam',
        '13': 'chininhaltig',
        '14': 'coffeinhaltig',
        '15': 'gentechnisch verändert',
        '16': 'enthält Sulfite',
        '17': 'enthält Phenylalanin',
        'A': 'Gluten',
        'B': 'Krebstiere',
        'C': 'Eier',
        'D': 'Fisch',
        'E': 'Erdnüsse',
        'F': 'Soja',
        'G': 'Milch und Milchprodukte',
        'H': 'Schalenfrüchte',
        'I': 'Sellerie',
        'J': 'Senf',
        'K': 'Sesamsamen',
        'L': 'Schwefeldioxid und Sulfite',
        'M': 'Lupinen',
        'N': 'Weichtiere',
        'ZTA': 'Aktionsgericht',
        'ZTB': 'mit ausschließlich biologisch erzeugten Rohstoffen',
        'ZTF': 'Fisch',
        'ZTG': 'Geflügel',
        'ZTL': 'Lamm',
        'ZTMSC': 'zertifizierte nachhaltige Fischerei (MSC-C-53400)',
        'ZTMV': 'Mensa Vital',
        'ZTR': 'Rindfleisch',
        'ZTS': 'Schweinefleisch',
        'ZTV': 'vegetarisch',
        'ZTVG': 'vegan',
        'ZTW': 'Wild'
    }
    #canteen.setLegendData(legend)

    hg = re.compile("^HG[1-9]$")
    b = re.compile("^B[1-9]$")
    n = re.compile("^N[1-9]$")

    #for w in 0, 1:
    for w in [0]:
        kw = (date.today() + timedelta(weeks=w)).isocalendar()[1]
        try:
            f = urlopen('%(location)s/%(isoweek)d.csv' % {
                'location': url,
                'isoweek': kw
            })
        except HTTPError as e:
            if e.code == 404:
                continue
            else:
                raise e
        f = f.read().decode('iso8859-1')

        roles = ('student', 'employee', 'other')

        initline = True
        mealreader = reader(f.splitlines(), delimiter=';')
        for row in mealreader:
            if initline:
                initline = False
            else:
                if row[2] == 'Suppe':
                    category = 'Suppe'
                elif hg.match(row[2]):
                    category = 'Hauptgerichte'
                elif b.match(row[2]):
                    category = 'Beilagen'
                elif n.match(row[2]):
                    category = 'Nachspeisen'
                else:
                    raise RuntimeError('Unknown category: ' + str(row[2]))

                mdate = row[0]
                notes = []

                mname = row[3]
                bpos = mname.find(')')
                while bpos != -1:
                    apos = mname.find('(')
                    for i in mname[apos + 1:bpos].split(','):
                        notes.append(i)
                    if bpos == len(mname) - 1:
                        mname = mname[:apos] + mname[bpos + 1:]
                        bpos = -1
                    else:
                        mname = mname[:apos] + ' und ' + mname[bpos + 1:]
                        bpos = mname.find(')')
                if mname.rfind(' ') == len(mname) - 1:
                    mname = mname[:len(mname) - 1]

                mtype = row[4]
                if mtype != '':
                    for i in mtype.split(','):
                        notes.append('ZT' + i)

                prices = [row[6], row[7], row[8]]

                mnotes = []
                for i in notes:
                    mnotes.append(legend.get(i, legend.get(i[2:], i)))

                try:
                    canteen.addMeal(mdate, category, mname, mnotes, prices,
                                    roles)
                except ValueError as e:
                    print('could not add meal {}/{} "{}" due to "{}"'.format(
                        mdate, category, mname, e),
                          file=sys.stderr)
                    # empty meal ...
                    pass

    return canteen.toXMLFeed()
Exemple #49
0
	document = parse(content, 'html.parser')

	items = document.find_all('a', {"class": "item"})

	for item in items:
		title = item.strong.string
		if not title:
			continue
		numbers = item.small.string
		notes = []
		if numbers:
			for number in numbers.split(','):
				number = int(number.strip())
				if number > len(legend):
					continue
				notes.append(legend[number])
		row = item.parent.parent
		price = row.find_all('td')[-1].string
		prices = {}
		if price:
			subprice = price.split('/')
			if len(subprice) == 3:
				prices = {'student': subprice[0], 'employee': subprice[1], 'other': subprice[2]}
			else:
				prices = {'other': price}
		canteen.addMeal(datetime.date(date.year, date.month, date.day), "Mittagessen", title, notes=notes, prices=prices)

	date = date + datetime.timedelta(1)

print(canteen.toXMLFeed())
def parse_url(url, today=False):
    canteen = LazyBuilder()
    canteen.setAdditionalCharges('student', {})
    parse_week(url, canteen)
    return canteen.toXMLFeed()
Exemple #51
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url + '&wann=2', canteen)
    if not today:
        parse_week(url + '&wann=3', canteen)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {
        '1':     'mit Farbstoff',
        '2':     'mit Konservierungsstoff',
        '3':     'mit Antioxidationsmittel',
        '4':     'mit Geschmacksverstärker',
        '5':     'geschwefelt',
        '6':     'geschwärzt',
        '7':     'gewachst',
        '8':     'mit Phosphat',
        '9':     'mit Süssungsmittel Saccharin',
        '10':    'mit Süssungsmittel Aspartam, enth. Phenylalaninquelle',
        '11':    'mit Süssungsmittel Cyclamat',
        '12':    'mit Süssungsmittel Acesulfam',
        '13':    'chininhaltig',
        '14':    'coffeinhaltig',
        '15':    'gentechnisch verändert',
        '16':    'enthält Sulfite',
        '17':    'enthält Phenylalanin',
        'A':     'Gluten',
        'B':     'Krebstiere',
        'C':     'Eier',
        'D':     'Fisch',
        'E':     'Erdnüsse',
        'F':     'Soja',
        'G':     'Milch und Milchprodukte',
        'H':     'Schalenfrüchte',
        'I':     'Sellerie',
        'J':     'Senf',
        'K':     'Sesamsamen',
        'L':     'Schwefeldioxid und Sulfite',
        'M':     'Lupinen',
        'N':     'Weichtiere',
        'ZTA':   'Aktionsgericht',
        'ZTB':   'mit ausschließlich biologisch erzeugten Rohstoffen',
        'ZTF':   'Fisch',
        'ZTG':   'Geflügel',
        'ZTL':   'Lamm',
        'ZTMSC': 'zertifizierte nachhaltige Fischerei (MSC-C-53400)',
        'ZTMV':  'Mensa Vital',
        'ZTR':   'Rindfleisch',
        'ZTS':   'Schweinefleisch',
        'ZTV':   'vegetarisch',
        'ZTVG':  'vegan',
        'ZTW':   'Wild'
    }

    # Create regular expressions for categories
    hg = re.compile("^HG[1-9]$")
    b = re.compile("^B[1-9]$")
    n = re.compile("^N[1-9]$")

    # Get current and next isoweek and try to get the data
    # On error 404 continue with next isoweek
    # Returns an empty feed if both isoweeks result in error 404
    # At most locations the data doesn't exist on term break
    for w in 0, 1:
        kw = (date.today() + timedelta(weeks=w)).isocalendar()[1]
        try:
            f = urlopen('%(location)s/%(isoweek)d.csv' %
                        {'location': url, 'isoweek': kw})
        except HTTPError as e:
            if e.code == 404:
                continue
            else:
                raise e

        # Decode data from ISO charset
        f = f.read().decode('iso8859-1')

        # Set roles for prices
        roles = ('student', 'employee', 'other')

        # Read csv data and skip the csv header
        mealreader = reader(f.splitlines(), delimiter=';')
        next(mealreader)
        for row in mealreader:
            mdate = row[0]
            category = row[2]
            mname = row[3]
            mtype = row[4]
            prices = [row[6], row[7], row[8]]

            # determine category for the current meal
            if category == 'Suppe':
                pass
            elif hg.match(category):
                category = 'Hauptgerichte'
            elif b.match(category):
                category = 'Beilagen'
            elif n.match(category):
                category = 'Nachspeisen'
            else:
                raise RuntimeError('Unknown category: ' + str(category))

            # Extract the notes from brackets in the meal name
            # Remove the brackets, notes and improve readability
            notes = []
            bpos = mname.find(')')
            while bpos != -1:
                apos = mname.find('(')
                # Extract notes from current brackets and avoid empty notes
                for i in mname[apos+1:bpos].split(','):
                    if i:
                        notes.append(i)
                # Check if brackets are at the end of the meal name
                if bpos == len(mname)-1:
                    # Remove brackets and break bracket loop
                    mname = mname[:apos]
                    bpos = -1
                else:
                    # Remove current brackets, improve readability
                    # and find the next brackets
                    mname = mname[:apos].rstrip() + ' und ' + mname[bpos+1:].lstrip()
                    bpos = mname.find(')')

            # Remove trailing whitespaces in the meal name
            mname = mname.rstrip()

            # Add meal type notes to notes list and avoid empty notes
            for i in mtype.split(','):
                if i:
                    notes.append('ZT' + i)

            # Translate notes via legend to human readable information
            mnotes = []
            for i in notes:
                mnotes.append(legend.get(i, legend.get(i[2:], i)))

            # Try to add the meal
            try:
                canteen.addMeal( mdate, category, mname,
                                mnotes, prices, roles)
            except ValueError as e:
                print('could not add meal {}/{} "{}" due to "{}"'.format(mdate, category, mname, e), file=sys.stderr)
                # empty meal ...
                pass

    # return xml data
    return canteen.toXMLFeed()
Exemple #53
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    parse_week(url + '&wann=2', canteen)
    if not today:
        parse_week(url + '&wann=3', canteen)
    return canteen.toXMLFeed()
Exemple #54
0
def parse_url(url, today=False):
    canteen = LazyBuilder()

    # prices are stored on a separate page
    document = parse(urlopen(base + '/mensa-preise/').read(), 'lxml')
    prices = {}
    for tr in document.find('div', 'ce-bodytext').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get('class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')

    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read(), 'lxml')
            errorCount = 0
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.timedelta(days=1)
                continue
            else:
                raise e

        # extract legend
        legend = {}
        legends = document.find('div', 'tx-stwm-speiseplan')
        additions = legends.find('div', 'c-schedule__filter-body')
        for table in additions.find_all('div', 'c-schedule__filter-item'):
            for ingredient in table.find('ul').find_all('li'):
                name = ingredient.find('dt').text.strip()
                description = ingredient.find('dd').text.strip()
                legend[name] = description
        for label in legends.find('ul', 'c-schedule__type-list').find_all('li'):
            name = label.find('dt').text.replace('(', '').replace(')', '').strip()
            description = label.find('dd').text.strip()
            legend[name] = description

        # extract meals
        mensa_data = document.find('ul', 'c-schedule__list')
        category = None
        for meal in mensa_data.find_all('li'):
            # update category or use previous one if not specified
            category_text = meal.find('dt', 'c-schedule__term').text.strip()
            if category_text:
                category = category_text

            data = meal.find('dd').find('p', 'js-schedule-dish-description')
            name = data.contents[0].strip() # name is the first text node
            if not name:
                continue

            # notes are contained in 3 boxes (type, additional, allergen) and
            # are comma-separated lists enclosed in brackets or parentheses
            notes = []
            for note in meal.find_all('span', 'c-schedule__marker'):
                note_text = note.find('span', 'u-text-sup').text \
                    .replace('(', '').replace(')', '') \
                    .replace('[', '').replace(']', '')
                notes += [n for n in note_text.split(',') if n]

            # some meals contain the GQB label in their name (instead of in notes)
            if '(GQB)' in name:
                name = name.replace('(GQB)', '').strip()
                notes.append('GQB')

            # the price for both meals is specified as Bio-/Aktionsgericht
            price_category = category \
                .replace('Aktionsessen', 'Bio-/Aktionsgericht') \
                .replace('Biogericht', 'Bio-/Aktionsgericht') \
                .strip()

            canteen.addMeal(date, category, name,
                [legend.get(n, n) for n in notes],
                prices.get(price_category, {})
            )

        date += datetime.timedelta(days=1)
        if today:
            break

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, "lxml")
    canteen = LazyBuilder()
    table = document.find_all('table')[0]

    def debug_print(food_type, food_description, priceing):
            if(priceing is None):
                print(date+': '+food_type+": "+food_description)
            else:
                print(date+': '+food_type+": "+food_description+" : ", end='')
                for e in priceing:
                    print(e, end=' ')
                print()

    def is_new_entry(tds):
        td = tds[0]
        return td.string is not None and date_regex.search(td.string) is not None

    def is_end_of_entry(tds):
        for td in tds:
            if(td.string is None or td.string.strip() != ''):
                return False
        return True

    def is_action_entry(td):
        return td.text == 'Aktion'

    def is_closed(tds):
        return is_new_entry(tds) and get_pricing(tds, 4, 7) is None

    def refactor_date(raw_date):
        now = datetime.datetime.now()
        day = date_regex.search(raw_date).group('day')
        month = date_regex.search(raw_date).group('month')
        year = now.year
        if month == '01' and now.month == 12:
            # if list depicts meals from this and the next year
            year+=1
        elif month == '12' and now.month == 1:
            # if list depicts meals form this and the last year
            year-=1
        return day+'.'+month+'.'+str(year)

    def parse_foot_type(td):
        type = ''
        if td.string is None:
            if len(td.find_all('img')) == 0:
                return None
            else:
                img = td.find_all('img')[0]
                src = img.get('src')
                if('msc' in src):
                    type += 'Fish MSC '
                elif('vegan' in src):
                    type += 'Vegan '
        #Sometimes none categorized food is possible, therfore we need to cover this,
        #otherwhise openmensa.org will faile dueto an empty tag.
        elif(td.string.strip() == ''):
            type += 'Tipp '
        else:
            if('R' in td.string):
                type += 'Rind '
            if('S' in td.string):
                type += 'Schwein '
            if('G' in td.string):
                type += 'Geflügel '
            if('V' in td.string):
                type += 'Vegetarisch '
            if('F' in td.string):
                type += 'Fisch '
            if('L' in td.string):
                type += 'Lamm '
            if('W' in td.string):
                type += 'Wild '
        tl = list(type)[:-1]
        return ''.join(tl)

    def get_refs(td):
        return td.find_all('sup')

    def get_foot_description(td):
        refl = get_refs(td)
        description = td.text
        for ref in refl:
            description = description.replace(' '+ref.text, '', 1)
        if description[0] == ' ':
            description = description.replace(' ', '', 1)
        return description

    def get_notes(td):
        refl = get_refs(td)
        strl = []
        for ref in refl:
            strl.extend(ref.string.split(','))
        strl = list(set(strl))
        return strl

    def build_notes_string(td):
        refs = get_notes(td)
        food_is = ''
        food_contains = ''
        for r in refs:
            # parse food is footnotes
            if r == '1':
                food_is += 'mit Farbstoffen, '
            elif r == '4':
                food_is += 'geschwärzt, '
            elif r == '7':
                food_is += 'mit Antioxidationsmittel, '
            elif r == '8':
                food_is += 'mit Geschmacksverstärker, '
            elif r == '9':
                food_is += 'geschwefelt, '
            elif r == '10':
                food_is += 'geschwärzt, '
            elif r == '11':
                food_is += 'gewachst, '
            elif r == '12':
                food_is += 'mit Phosphat, '
            elif r == '5':
                food_is += 'mit Süßungsmittel, '
            # parse allergic footnotes
            elif r == 'a1':
                food_contains += 'Gluten, '
            elif r == 'a2':
                food_contains += 'Krebstiere, '
            elif r == 'a3':
                food_contains += 'Eier, '
            elif r == 'a4':
                food_contains += 'Fisch, '
            elif r == 'a5':
                food_contains += 'Erdnüsse, '
            elif r == 'a6':
                food_contains += 'Soja, '
            elif r == 'a7':
                food_contains += 'Milch/Laktose, '
            elif r == 'a8':
                food_contains += 'Schalenfrüchte, '
            elif r == 'a9':
                food_contains += 'Sellerie, '
            elif r == 'a10':
                food_contains += 'Senf, '
            elif r == 'a11':
                food_contains += 'Sesam, '
            elif r == 'a12':
                food_contains += 'Schwefeldioxid/Sulfite, '
            elif r == 'a13':
                food_contains += 'Lupinen, '
            elif r == 'a14':
                food_contains += 'Weichtiere, '
            else:
                food_contains += 'undefinierte Chemikalien:'+r+', '
        notes = ''
        if food_is != '':
            notes += 'Gericht ist ' + food_is
        if food_contains != '':
            if food_is == '':
                notes += 'Gericht enthält '
            else:
                notes += 'und enthält '
            notes += food_contains
        if notes != '':
            nl = list(notes)
            del nl[len(nl)-1]
            nl[len(nl)-1] = '.'
            notes = ''.join(nl)
        return notes

    def get_pricing(tds, f, t):
        priceing = []
        #sometimes we dont don't get 7 elements, than this might be a special day
        if len(tds) < 7:
            return None
        for i in range(f, t):
            raw_price = tds[i].string.strip()
            if raw_price == '':
                return None
            else:
                priceing.append(price_regex.search(raw_price).group('val'))
        return priceing

    # state helper
    inside_valide_entry = False
    date = ''

    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if(is_new_entry(tds)):
            try:
                raw_date = tds[0].string
                date = refactor_date(raw_date)
                if(is_closed(tds)):
                    # sometismes a canteen might look closed but actually its spargeltage
                    if "Spargeltage" in tds[3].text:
                        canteen.addMeal(date, "Spargel", "Spargel Tag", ["Spargel und andere Gerichte."], None, None)
                    else:
                        canteen.setDayClosed(date)
                else:
                    inside_valide_entry = True
            except Exception as e:
                traceback.print_exception(*sys.exc_info())
        if(is_end_of_entry(tds)):
            inside_valide_entry = False
        elif inside_valide_entry:
            try:
                notes = []
                if is_action_entry(tds[0]):
                    food_type = parse_foot_type(tds[1])
                    food_description = get_foot_description(tds[2])
                    notes_string = build_notes_string(tds[2])
                    if(notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 3, 6)


                    canteen.addMeal(date, 'Aktion: '+food_type, food_description, notes, prices, roles if prices else None)
                else:
                        food_type = parse_foot_type(tds[2])
                        food_description = get_foot_description(tds[3])
                        notes_string = build_notes_string(tds[3])
                        if(notes_string != ""):
                            notes.append(notes_string)
                        prices = get_pricing(tds, 4, 7)
                        if food_type is not None:
                            canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None)
            except Exception as e:
                traceback.print_exception(*sys.exc_info())

    return canteen.toXMLFeed()
Exemple #56
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'}
    document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read())
    for td in document.find_all('td', 'beschreibung'):
        legend[td.parent.find('td', 'gericht').text] = td.text
    document = parse(urlopen(base + '/mensa-preise/').read())
    prices = {}
    for tr in document.find('div', 'ce-bodytext').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get(
                'class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')
    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read())
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.date.resolution
                continue
            else:
                raise e
        else:
            errorCount = 0
        for tr in document.find('table', 'zusatzstoffe').find_all('tr'):
            identifier = tr.find_all('td')[0].text \
                           .replace('(', '').replace(')', '')
            legend[identifier] = tr.find_all('td')[1].text.strip()
        canteen.setLegendData(legend)
        mensa_data = document.find('table', 'menu')
        category = None
        for menu_tr in mensa_data.find_all('tr'):
            if menu_tr.find('td', 'headline'):
                continue
            if menu_tr.find('td', 'gericht').text:
                category = menu_tr.find('td', 'gericht').text
            data = menu_tr.find('td', 'beschreibung')
            name = data.find('span').text.strip()
            if not name:
                continue
            notes = [
                span['title'] for span in data.find_all('span', title=True)
            ]
            canteen.addMeal(
                date, category, name, notes,
                prices.get(
                    category.replace('Aktionsessen', 'Bio-/Aktionsgericht'),
                    {}))
        date += datetime.date.resolution
        if today:
            break
    return canteen.toXMLFeed()