def parse_url(url, today=False, canteentype="Mittagsmensa", this_week="", next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[: url.find("essen/") + 6] + "wissenswertes/lebensmittelkennzeichnung"
    legend_doc = parse(urlopen(legend_url)).find(id="artikel")
    allergene = buildLegend(
        text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)"
    )
    allergene["EI"] = "Ei"
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)"
    )
    for tr in legend_doc.find_all("tr"):
        tds = tr.find_all("td")
        if len(tds) != 2:
            continue
        title = tds[0].find("strong")
        if title is None:
            continue
        else:
            title = title.text
        text = tds[1].text.replace("enthält", "").strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + "-kommende-woche", canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe)
    print(canteen.toXMLFeed())
    return canteen.toXMLFeed()
Example #2
0
def parse_url(url,
              today=False,
              canteentype='Mittagsmensa',
              this_week='',
              next_week=True,
              legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') +
                         6] + 'wissenswertes/lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel')
    allergene = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)')
    allergene['EI'] = 'Ei'
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)')
    suballergene = re.compile(
        r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)')
    for tr in legend_doc.find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) != 2:
            continue
        title = tds[0].find('strong')
        if title is None:
            continue
        else:
            title = title.text
        lines = tds[1].text.split('\n')
        for line in lines[1:]:
            try_allergine = suballergene.match(line)
            if try_allergine:
                allergene[try_allergine.group('name')] = try_allergine.group(
                    'value')
        text = lines[0].replace('enthält', '').strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week,
               canteen,
               canteentype,
               allergene=allergene,
               zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche',
                   canteen,
                   canteentype,
                   allergene=allergene,
                   zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week,
                   canteen,
                   canteentype,
                   allergene=allergene,
                   zusatzstoffe=zusatzstoffe)
    return canteen.toXMLFeed()
Example #3
0
def parse_url(url, today=False):
    canteen = OpenMensaCanteen()
    # todo only for: Tellergericht, vegetarisch, Klassiker, Empfehlung des Tages:
    canteen.setAdditionalCharges("student", {"other": 1.5})

    document = parse(urlopen(url).read())

    global legend
    regex = "\((?P<name>[\dA-Z]+)\)\s*(?P<value>[\w\s]+)"
    legend = buildLegend(legend, document.find(id="additives").text, regex=regex)

    days = (
        "montag",
        "dienstag",
        "mittwoch",
        "donnerstag",
        "freitag",
        "montagNaechste",
        "dienstagNaechste",
        "mittwochNaechste",
        "donnerstagNaechste",
        "freitagNaechste",
    )
    for day in days:
        data = document.find("div", id=day)
        headline = document.find("a", attrs={"data-anchor": "#" + day})
        parse_day(canteen, headline.text, data)
    return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') + 6] + 'wissenswertes/lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel')
    allergene = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)'
    )
    allergene['EI'] = 'Ei'
    zusatzstoffe = buildLegend(
        text=legend_doc.text.replace('\xa0', ' '),
        regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)'
    )
    suballergene = re.compile(r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)')
    for tr in legend_doc.find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) != 2:
            continue
        title = tds[0].find('strong')
        if title is None:
            continue
        else:
            title = title.text
        lines = tds[1].text.split('\n')
        for line in lines[1:]:
            try_allergine = suballergene.match(line)
            if try_allergine:
                allergene[try_allergine.group('name')] = try_allergine.group('value')
        text = lines[0].replace('enthält', '').strip()
        if title.isdigit():
            zusatzstoffe[title] = text
        else:
            allergene[title] = text
    parse_week(url + this_week, canteen, canteentype,
               allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche', canteen, canteentype,
                   allergene=allergene, zusatzstoffe=zusatzstoffe)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype,
                   allergene=allergene, zusatzstoffe=zusatzstoffe)
    return canteen.toXMLFeed()
Example #5
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(legend, str(dom), regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)')
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select('td[class="mensa_col_55"] > span')
                    if notesElement != None and len(notesElement) > 0 and notesElement[0].text != None:
                        notes = [legend.get(n, n) for n in notesElement[0].text.split(' ') if n]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(groupElement.contents) > 0 and priceElement.contents != None and len(priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()
Example #6
0
def parse_url(url, today=False):
    canteen = OpenMensaCanteen()
    # todo only for: Tellergericht, vegetarisch, Klassiker, Empfehlung des Tages:
    canteen.setAdditionalCharges('student', {'other': 1.5})

    document = parse(urlopen(url).read())

    global legend
    regex = '\((?P<name>[\dA-Z]+)\)\s*(?P<value>[\w\s]+)'
    legend = buildLegend(legend, document.find(id='additives').text, regex=regex)

    days = ('montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag',
            'montagNaechste', 'dienstagNaechste', 'mittwochNaechste', 'donnerstagNaechste', 'freitagNaechste')
    for day in days:
        data = document.find('div', id=day)
        headline = document.find('a', attrs={'data-anchor': '#' + day})
        parse_day(canteen, headline.text, data)
    return canteen.toXMLFeed()
Example #7
0
def parse_url(url, today=False):
    canteen = OpenMensaCanteen()
    # todo only for: Tellergericht, vegetarisch, Klassiker, Empfehlung des Tages:
    canteen.setAdditionalCharges('student', {'other': 1.5})

    document = parse(urlopen(url).read())

    global legend
    regex = '(?P<name>(\d|[A-Z])+)\)\s*' + \
            '(?P<value>\w+((\s+\w+)*[^0-9)]))'
    legend = buildLegend(legend, document.find(id='additives').text, regex=regex)

    days = ('montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag',
            'montagNaechste', 'dienstagNaechste', 'mittwochNaechste', 'donnerstagNaechste', 'freitagNaechste')
    for day in days:
        data = document.find('div', id=day)
        headline = document.find('a', attrs={'data-anchor': '#' + day})
        parse_day(canteen, headline.text, data)
    return canteen.toXMLFeed()
Example #8
0
 def test_note_extraction(self):
     text = '1) Schwein a)Farbstoff'
     legend = {'1': 'Schwein', 'a': 'Farbstoff'}
     assert buildLegend({}, text=text) == legend
Example #9
0
 def test_dict_passthrought(self):
     d = {}
     assert buildLegend(d) is d
Example #10
0
 def test_dict_passthrought(self):
     d = {}
     assert buildLegend(d) is d
Example #11
0
 def test_note_extraction(self):
     text = '1) Schwein a)Farbstoff'
     legend = {'1': 'Schwein', 'a': 'Farbstoff'}
     assert buildLegend({}, text=text) == legend
Example #12
0
def parse_legend(document):
    regex = '\((?P<name>[\dA-Z]+)\)\s*(?P<value>[\w\s]+)'
    return buildLegend(text=document.find(id='additives').text, regex=regex)
Example #13
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(
            legend,
            str(dom),
            regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)'
        )
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[
                        0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select(
                        'td[class="mensa_col_55"] > span')
                    if notesElement != None and len(
                            notesElement) > 0 and notesElement[0].text != None:
                        notes = [
                            legend.get(n, n)
                            for n in notesElement[0].text.split(' ') if n
                        ]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(
                                groupElement.contents
                        ) > 0 and priceElement.contents != None and len(
                                priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()
Example #14
0
def parse_legend(document):
    regex = '\((?P<name>[\dA-Z]+)\)\s*(?P<value>[\w\s]+)'
    return buildLegend(text=document.find(id='additives').text, regex=regex)