예제 #1
0
def markMessageAsRead(cname, mid, isRead=True):
    assert (type(mid) in [str, unicode])
    isRead = 'true' if isRead else 'false'
    url = schildren.getChildURL(cname, '/messages/UpdateMessagesReadState')
    data = {'selectionState[MessageIds][]': mid, 'isRead': isRead}
    config.clog(cname, u'Markerer besked #%s som læst' % mid, 3)
    surllib.skoleGetURL(url, noCache=True, postData=data)
예제 #2
0
def markMessageAsRead(cname, mid, isRead=True):
    assert(type(mid) in [str, unicode])
    isRead = 'true' if isRead else 'false'
    url = schildren.getChildURL(cname, '/messages/UpdateMessagesReadState')
    data = {'selectionState[MessageIds][]': mid, 'isRead': isRead}
    config.clog(cname, u'Markerer besked #%s som læst' % mid, 3)
    surllib.skoleGetURL(url, noCache=True, postData=data)
예제 #3
0
def listsCheckList(postData, listtype):
    global bs, tbl
    # Fetch potential cached version
    bs = surllib.skoleGetURL(URL_MAIN, True, False, True, postData)

    if bs.cacheage > 6.9:
        bs = surllib.skoleGetURL(URL_MAIN, True, True, True, postData)

    tbl = bs.findAll('table')[2]
    if listtype == 'V6':
        # Remove links to pictures of parents
        for a in tbl.findAll('a'):
            a.replaceWithChildren()

    tr = tbl.find('tr')
    if tr.find('h2'):
        title = tr.find('h2').text
        tr.extract()
    elif tbl.find('h2'):
        title = tbl.find('h2').text
    else:
        title = u'Kontaktoplysninger'

    msg = semail.Message('contactList', tbl)
    msg.setTitle(title)
    semail.maybeEmail(msg)
예제 #4
0
def getMsgsForChild(cname):
    '''Find all new messages for a single child'''
    dtype = surllib.getBrowser().getState('dialogue')
    if dtype == 'conversations':
        # New more "gmail" like message view
        url = schildren.getChildURL(cname, '/messages/conversations')
        config.clog(cname, u'Kigger efter nye beskeder på %s' % url)
        bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

        return parseMessages(cname, bs)
    elif dtype == 'inbox':
        # Old message view
        msgs = []
        for tray in ['inbox', 'outbox']:
            url = schildren.getChildURL(cname, '/messages/'+tray)
            config.clog(cname, u'Kigger efter nye beskeder på %s' % url)
            bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

            msgs += parseTrayMessages(cname, bs)

        return msgs
    else:
        config.clog(cname, u'Beskede-indbakke-type %r ikke understøttet'
                    % dtype, 0)
        return []
예제 #5
0
def getMsgsForChild(cname):
    '''Find all new messages for a single child'''
    dtype = surllib.getBrowser().getState('dialogue')
    if dtype == 'conversations':
        # New more "gmail" like message view
        url = schildren.getChildURL(cname, '/messages/conversations')
        config.clog(cname, u'Kigger efter nye beskeder på %s' % url)
        bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

        return parseMessages(cname, bs)
    elif dtype == 'inbox':
        # Old message view
        msgs = []
        for tray in ['inbox', 'outbox']:
            url = schildren.getChildURL(cname, '/messages/' + tray)
            config.clog(cname, u'Kigger efter nye beskeder på %s' % url)
            bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

            msgs += parseTrayMessages(cname, bs)

        return msgs
    else:
        config.clog(cname,
                    u'Beskede-indbakke-type %r ikke understøttet' % dtype, 0)
        return []
예제 #6
0
def skoleSelectChild(name):
    global _children, URL_PREFIX
    assert name in _children

    if name == config.CHILDNAME:
        config.log(u"[%s] er allerede valgt som barn" % name)
    else:
        config.log(u"Vælger [%s]" % name)
        url = URL_PREFIX + _children[name]
        surllib.skoleGetURL(url, False, noCache=True)
        config.CHILDNAME = name
예제 #7
0
def skoleSelectChild(name):
    global _children
    assert (name in _children)

    if name == config.CHILDNAME:
        config.log(u'[%s] er allerede valgt som barn' % name)
    else:
        config.log(u'Vælger [%s]' % name)
        url = urlPrefix() + _children[name]
        surllib.skoleGetURL(url, False, noCache=True)
        config.CHILDNAME = name
예제 #8
0
def getMsgsForChild(cname):
    '''Look for new frontpage news'''
    url = schildren.getChildURL(cname, '/Index')
    config.clog(cname, u'Behandler forsiden %s' % url)
    bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    return parseFrontpage(cname, bs)
예제 #9
0
def wpFindWeekplans(bs):
    trs = bs.findAll('tr')

    for line in trs:
        if not line.has_key('class'):
            continue
        if not [c for c in line['class'].split() if c.startswith('linje')]:
            continue

        links = line.findAll('a')
        assert(len(links) >= 1)

        # find week date
        title = links[0].text

        # find url
        url = links[0]['href']
        url = url.encode('iso-8859-1')
        url = URL_PREFIX + urllib.quote(url, safe=':/?=&%')

        bs = surllib.skoleGetURL(url, True, True)
        bs = wpTrimPlan(bs)

        msg = semail.Message('weekplans', bs)
        msg.setTitle(u'%s' % title)
        msg.updatePersonDate()
        msg.maybeSend()
예제 #10
0
def skoleGetChildren():
    '''Returns of list of "available" children in the system'''
    global _children

    # reset list of children
    _children = None

    # reset login
    surllib.resetSkoleLogin()

    # ensure that we are logged in
    surllib.skoleLogin()  # done automatically later

    config.log(u'Henter liste af børn')

    if not _children:
        data = surllib.skoleGetURL(url(), asSoup=True, noCache=True)
        if not data:
            return []

        _children = {}
        for a in data.findAll('a'):
            href = a['href']
            name = a.span.text

            if name in NAMES_IGNORE:
                config.log(u'Ignorerer [%s]' % name)
                continue

            _children[name] = href

    return sorted(_children.keys())
예제 #11
0
def getMsgsForChild(cname):
    '''Look for new frontpage news'''
    url = schildren.getChildURL(cname, '/Index')
    config.clog(cname, u'Behandler forsiden %s' % url)
    bs = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    return parseFrontpage(cname, bs)
예제 #12
0
def parseTrayMessages(cname, bs):
    '''Look for new messages in a message tray (old message view)'''
    msgs = []

    for div in bs.select('.sk-message-list-item'):
        url = div.find('a')['href']

        mid = re.findall('(?<=/message/)[0-9]+', url)
        assert (len(mid) == 1 and mid[0])
        mid = mid[0]

        sender = div.find('li', 'sk-message-senderrecipient-name').text.strip()
        m = re.match(r'^([^(]*) \(.*\)$', sender)
        if m:
            sender = m.group(1)

        # We could also get the title
        title = div.find('div', 'sk-message-title').text.strip()

        if semail.hasSentMessage(tp=SECTION, mid=mid):
            continue

        config.clog(cname, u'Henter ny besked: %s - %s' % (sender, title), 2)
        bs = surllib.skoleGetURL(url, True)

        msg = parseTrayMessage(cname, bs, mid, sender)
        msgs.append(msg)

    return msgs
예제 #13
0
def wpFindWeekplans(bs):
    trs = bs.findAll('tr')

    for line in trs:
        if not line.has_key('class'):
            continue
        if not [c for c in line['class'].split() if c.startswith('linje')]:
            continue

        links = line.findAll('a')
        assert (len(links) >= 1)

        # find week date
        title = links[0].text

        # find url
        url = links[0]['href']
        url = url.encode('iso-8859-1')
        url = urlPrefix() + urllib.quote(url, safe=':/?=&%')

        bs = surllib.skoleGetURL(url, True, True)
        bs = wpTrimPlan(bs)

        msg = semail.Message('weekplans', bs)
        msg.setTitle(u'%s' % title)
        msg.updatePersonDate()
        msg.maybeSend()
예제 #14
0
def skolePhotos(cname):
    'Billeder'
    url = schildren.getChildURL(cname, '/photos/archives')
    bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)

    config.clog(cname, u'Kigger efter billeder')
    findPhotos(cname, bs)
예제 #15
0
def skolePhotos(cname):
    'Billeder'
    url = schildren.getChildURL(cname, '/photos/archives')
    bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)

    config.clog(cname, u'Kigger efter billeder')
    findPhotos(cname, bs)
예제 #16
0
def parseTrayMessages(cname, bs):
    '''Look for new messages in a message tray (old message view)'''
    msgs = []

    for div in bs.select('.sk-message-list-item'):
        url = div.find('a')['href']

        mid = re.findall('(?<=/message/)[0-9]+', url)
        assert(len(mid) == 1 and mid[0])
        mid = mid[0]

        sender = div.find('li', 'sk-message-senderrecipient-name').text.strip()
        m = re.match(r'^([^(]*) \(.*\)$', sender)
        if m:
            sender = m.group(1)

        # We could also get the title
        title = div.find('div', 'sk-message-title').text.strip()

        if semail.hasSentMessage(tp=SECTION, mid=mid):
            continue

        config.clog(cname, u'Henter ny besked: %s - %s' % (sender, title), 2)
        bs = surllib.skoleGetURL(url, True)

        msg = parseTrayMessage(cname, bs, mid, sender)
        msgs.append(msg)

    return msgs
예제 #17
0
def skoleSignup(cname):
    'Tilmelding til samtaler/arrangementer'
    config.clog(cname, u'Kigger efter nye samtaler/arrangementer')
    for suffix in ('conversation', 'event'):
        url = schildren.getChildURL(cname, '/signup/' + suffix)
        bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)
        findEvents(cname, bs)
예제 #18
0
def skoleContacts(cname):
    'Kontaktinformation'
    config.clog(cname, u'Kigger efter ny kontaktinformation')
    url = schildren.getChildURL(cname, '/contacts/students/cards')

    bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)

    opts = bs.select('#sk-toolbar-contact-dropdown option')
    if not opts:
        config.clog(cname, u'Kan ikke finde nogen elever?')
        return

    for opt in opts:
        url = opt['value']
        bs2 = surllib.skoleGetURL(url, True, bs.cacheage + .01)

        contactCard(cname, bs2)
예제 #19
0
def skoleContacts(cname):
    'Kontaktinformation'
    config.clog(cname, u'Kigger efter ny kontaktinformation')
    url = schildren.getChildURL(cname, '/contacts/students/cards')

    bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)

    opts = bs.select('#sk-toolbar-contact-dropdown option')
    if not opts:
        config.clog(cname, u'Kan ikke finde nogen elever?')
        return

    for opt in opts:
        url = opt['value']
        bs2 = surllib.skoleGetURL(url, True, bs.cacheage + .01)

        contactCard(cname, bs2)
예제 #20
0
def skoleWeekplans():
    global bs

    surllib.skoleLogin()
    config.log(u'Kigger efter nye ugeplaner')

    # read the initial page
    bs = surllib.skoleGetURL(urlMain(), True, True)
    wpFindWeekplans(bs)
예제 #21
0
def skoleDocuments():
    global bs

    # surllib.skoleLogin()
    config.log(u"Kigger efter nye dokumenter")

    # read the initial page
    bs = surllib.skoleGetURL(URL_MAIN, True, True)
    docFindDocuments(bs)
예제 #22
0
def skoleWeekplans():
    global bs

    # surllib.skoleLogin()
    config.log(u'Kigger efter nye ugeplaner')

    # read the initial page
    bs = surllib.skoleGetURL(URL_MAIN, True, True)
    wpFindWeekplans(bs)
예제 #23
0
def skoleDocuments():
    global bs

    # surllib.skoleLogin()
    config.log(u'Kigger efter nye dokumenter')

    # read the initial page
    bs = surllib.skoleGetURL(URL_MAIN, True, True)
    docFindDocuments(bs)
예제 #24
0
def checkForUpdates():
    now = datetime.datetime.now()

    bs = surllib.skoleGetURL(
        '/notifications/v1?useNewerThan=False&pageSize=10', True, True)

    # Find lastUpdateURL
    links = bs.select('.sk-notifications-list li a')
    if links and links[0].has_attr('href'):
        updateURL = links[0]['href']
        config.log(u'Sidste opdatering var til %s' % updateURL, 2)
    else:
        updateURL = None
        config.log(u'Kunne ikke finde sidst opdaterede side', 2)

    state = (now, updateURL)

    # Should we do a full check?
    br = surllib.getBrowser()

    if br.getState('lastUpdateURL') != updateURL:
        # New top update
        config.log(u'Kører fuld opdatering: Forventer nyt opslag/besked', 1)
        return (True, state)

    try:
        lut_ = br.getState('lastUpdateTime')
        lut = datetime.datetime.strptime(lut_, DT_FORMAT) if lut_ else None
    except ValueError:
        lut = None

    if not lut or now < lut:
        # lastUpdateTime is somehow wrong
        config.log(u'Kører fuld opdatering: Mangler tidsstempel fra '
                   u'sidste kørsel', 1)
        return (True, state)

    # Do a daily full check the first time we are accessed after 05:00
    pit = now.replace(hour=5, minute=0, second=0, microsecond=0)
    if now.hour < 5:
        # Between midnight and 05:00, go back one day
        pit -= datetime.timedelta(1)
    if lut <= pit:
        # Last update was before
        config.log(u'Kører fuld opdatering: Første kørsel i dag', 1)
        return (True, state)

    # Did we NOT run this with the --quick parameter
    if config.options.fullupdate:
        config.log(u'Kører fuld opdatering selvom der ikke forventes '
                   u'nyt. Du bør bruge --quick', 1)
        return (True, state)

    # No need to run a full update
    config.log(u'Kører ikke fuld opdatering - der forventes intet nyt', 1)
    return (False, state)
예제 #25
0
def skoleExamineNews(url, mid):
    bs = surllib.skoleGetURL(url, True)

    # title + main text
    title = bs.h3.text
    main = bs.findAll('table')[3].table

    # create msg
    msg = semail.Message(u'dialogue', main)
    msg.setMessageID(mid)
    msg.setTitle(title)
    msg.updatePersonDate()

    semail.maybeEmail(msg)
예제 #26
0
def skoleDocuments(cname):
    'Dokumenter'
    for rootTitle, folder in [('Klassens dokumenter', 'class')]:
        config.clog(cname, u'%s: Kigger efter dokumenter' % rootTitle)
        url = schildren.getChildURL(cname, '/documents/' + folder)

        bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)
        docFindDocuments(cname, rootTitle, bs, '')

        # look for sub folders
        js = bs.find(id='FoldersJson')
        if js and js.has_attr('value'):
            sfs = json.loads(js['value'])

            for sf in sfs:
                if sf[u'Name'].startswith('$'):
                    continue

                title = sf[u'Title']
                url = sf[u'Url']
                bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE, None, True)

                docFindDocuments(cname, rootTitle, bs, title)
예제 #27
0
def skoleExamineNews(url, mid):
    bs = surllib.skoleGetURL(url, True)

    # title + main text
    title = bs.h3.text
    main = bs.findAll('table')[3].table

    # create msg
    msg = semail.Message(u'dialogue', main)
    msg.setMessageID(mid)
    msg.setTitle(title)
    msg.updatePersonDate()

    semail.maybeEmail(msg)
예제 #28
0
def findPhotos(cname, bs):
    prefix = schildren.getChildURLPrefix(cname)

    for opt in bs.select('#sk-photos-toolbar-filter option'):
        if not opt.has_attr('value'):
            continue
        url = surllib.absurl(opt['value'])
        folder = opt.text.strip()
        if not url.startswith(prefix):
            config.clog(cname, u'Billeder: %s: ukendt URL %r' %
                        (folder, opt['value']))
            continue

        bs2 = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)
        findPhotosInFolder(cname, url, bs2)
예제 #29
0
def findPhotos(cname, bs):
    prefix = schildren.getChildURLPrefix(cname)

    for opt in bs.select('#sk-photos-toolbar-filter option'):
        if not opt.has_attr('value'):
            continue
        url = surllib.absurl(opt['value'])
        folder = opt.text.strip()
        if not url.startswith(prefix):
            config.clog(
                cname, u'Billeder: %s: ukendt URL %r' % (folder, opt['value']))
            continue

        bs2 = surllib.skoleGetURL(url, True, MAX_CACHE_AGE)
        findPhotosInFolder(cname, url, bs2)
예제 #30
0
def skoleGetChildren():
    '''Returns of list of "available" children in the system'''
    global URL, _children

    # ensure that we are logged in
    # surllib.skoleLogin() # done automatically later

    config.log(u'Henter liste af børn')

    if not _children:
        data = surllib.skoleGetURL(URL, asSoup=True, noCache=True)

        _children = {}
        for a in data.findAll('a'):
            href = a['href']
            name = a.span.text
            _children[name] = href

    return sorted(_children.keys())
예제 #31
0
def diaExamineMessage(url, mid):
    '''Look at the url and mid. Returns True iff an email was sent'''
    bs = surllib.skoleGetURL(url, True)

    # first, find main text
    tr = bs.find('tr', valign='top')
    assert(tr)
    phtml = tr.find('td')
    msg = semail.Message(u'dialogue', phtml)
    msg.setMessageID(mid)

    # next, look at the header
    header = bs.find('table', 'linje1')
    assert(header)  # there must be a header
    headerLines = header.findAll('tr')
    assert(len(headerLines) >= 3)  # there must be something inside the header

    for hl in headerLines:
        txt = hl.text
        if not txt:
            continue  # ignore
        elif txt.startswith(u'Denne besked slettes'):
            pass  # ignore
        elif hl.find('h4'):
            # title
            msg.setTitle(txt)
        elif txt.startswith(u'Besked fra') or txt.startswith(u'Oprettet af'):
            # Besked fra Frk Nielsen - modtaget den 26-09-2012 20:29:44
            msg.updatePersonDate(hl)
        elif txt.startswith(u'Sendt til '):
            # Sendt til ...
            msg.setRecipient(txt.split(u' ', 2)[-1])
        elif txt.startswith(u'Kopi til '):
            # Sendt til ...
            msg.setCC(txt.split(u' ', 2)[-1])
        else:
            config.log(u'Ukendt header i besked #%s: %s' % (mid, txt), -1)

    return msg.maybeSend()
예제 #32
0
def diaExamineMessage(url, mid):
    '''Look at the url and mid. Returns True iff an email was sent'''
    bs = surllib.skoleGetURL(url, True)

    # first, find main text
    tr = bs.find('tr', valign='top')
    assert (tr)
    phtml = tr.find('td')
    msg = semail.Message(u'dialogue', phtml)
    msg.setMessageID(mid)

    # next, look at the header
    header = bs.find('table', 'linje1')
    assert (header)  # there must be a header
    headerLines = header.findAll('tr')
    assert (len(headerLines) >= 3)  # there must be something inside the header

    for hl in headerLines:
        txt = hl.text
        if not txt:
            continue  # ignore
        elif txt.startswith(u'Denne besked slettes'):
            pass  # ignore
        elif hl.find('h4'):
            # title
            msg.setTitle(txt)
        elif txt.startswith(u'Besked fra') or txt.startswith(u'Oprettet af'):
            # Besked fra Frk Nielsen - modtaget den 26-09-2012 20:29:44
            msg.updatePersonDate(hl)
        elif txt.startswith(u'Sendt til '):
            # Sendt til ...
            msg.setRecipient(txt.split(u' ', 2)[-1])
        elif txt.startswith(u'Kopi til '):
            # Sendt til ...
            msg.setCC(txt.split(u' ', 2)[-1])
        else:
            config.log(u'Ukendt header i besked #%s: %s' % (mid, txt), -1)

    return msg.maybeSend()
예제 #33
0
def skoleContactLists():
    global bs, lists

    config.log(u'Kigger efter nye adresser')

    # read the initial page
    bs = surllib.skoleGetURL(URL_MAIN, True, False, True)

    # Setup post request
    postData = {}

    for inpd in [{'id': 'fSkjult'}, {'type': 'submit'}]:
        inp = bs.find('input', **inpd)
        if not inp:
            config.log(u'pgContactLists: INPUT med %s ej fundet' % repr(inpd))
            return
        postData[inp['name']] = inp['value']

    lists = None
    for sel in bs.findAll('select'):
        fst = sel.option['value']
        if sel['name'] in ['fKlasse', 'fSortering']:
            postData[sel['name']] = fst
        elif sel['name'] == 'R1':
            lists = sel
        else:
            # Unknown SELECT found
            config.log(u'pgContactLists: Ukendt SELECT: %s' % sel['name'])
            return

    if not lists:
        config.log(u'pgContactLists: SELECT med mulige lister ej fundet')
        return

    for opt in lists.findAll('option'):
        if opt['value'] in LISTS_TO_SEND:
            postData[lists['name']] = opt['value']
            listsCheckList(postData, opt['value'])
예제 #34
0
def skoleWeekplans(cname):
    'Ugeplaner'
    config.clog(cname, u'Kigger efter nye ugeplaner')
    url = schildren.getChildURL(cname, 'item/weeklyplansandhomework/list/')

    bs = surllib.skoleGetURL(url, True, noCache=True)

    ul = bs.find('ul', 'sk-weekly-plans-list-container')
    if ul:
        for a in ul.find_all('a', href=True):
            url = a['href']
            plan = getWeekplan(cname, url)
            wid = url.split('/')[-1]  # e.g. 35-2018
            title = plan.find('h3').text.strip()

            msg = semail.Message(cname, SECTION, unicode(plan))
            msg.setTitle(title)
            msg.setMessageID(wid)
            msg.maybeSend()
    else:
        if u'ikke autoriseret' in bs.text:
            config.clog(cname, u'Din skole bruger ikke ugeplaner. '
                        u"Du bør bruge '--section ,-%s'" % SECTION)
예제 #35
0
def skoleWeekplans(cname):
    'Ugeplaner'
    config.clog(cname, u'Kigger efter nye ugeplaner')
    url = schildren.getChildURL(cname, 'item/weeklyplansandhomework/list/')

    bs = surllib.skoleGetURL(url, True, noCache=True)

    ul = bs.find('ul', 'sk-weekly-plans-list-container')
    if ul:
        for a in ul.find_all('a', href=True):
            url = a['href']
            plan = getWeekplan(cname, url)
            wid = url.split('/')[-1]  # e.g. 35-2018
            title = plan.find('h3').text.strip()

            msg = semail.Message(cname, SECTION, unicode(plan))
            msg.setTitle(title)
            msg.setMessageID(wid)
            msg.maybeSend()
    else:
        if u'ikke autoriseret' in bs.text:
            config.clog(
                cname, u'Din skole bruger ikke ugeplaner. '
                u"Du bør bruge '--section ,-%s'" % SECTION)
예제 #36
0
def skoleGetChildren():
    '''Returns of list of "available" children in the system'''
    global URL, _children

    # ensure that we are logged in
    # surllib.skoleLogin() # done automatically later

    config.log(u'Henter liste af børn')

    if not _children:
        data = surllib.skoleGetURL(URL, asSoup=True, noCache=True)

        _children = {}
        for a in data.findAll('a'):
            href = a['href']
            name = a.span.text

            if name == SKOLEBESTYRELSE_NAME:
                config.log(u'Ignorerer [%s]' % name)
                continue

            _children[name] = href

    return sorted(_children.keys())
예제 #37
0
def skoleGetChildren():
    """Returns of list of "available" children in the system"""
    global URL, _children

    # ensure that we are logged in
    # surllib.skoleLogin() # done automatically later

    config.log(u"Henter liste af børn")

    if not _children:
        data = surllib.skoleGetURL(URL, asSoup=True, noCache=True)

        _children = {}
        for a in data.findAll("a"):
            href = a["href"]
            name = a.span.text

            if name in NAMES_IGNORE:
                config.log(u"Ignorerer [%s]" % name)
                continue

            _children[name] = href

    return sorted(_children.keys())
예제 #38
0
파일: semail.py 프로젝트: dvaske/fskintra
    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title, tag=''):
            if title:
                title += u': '
            val = mpp.get(key, None)
            if val:
                if tag:
                    val = u'<%s>%s</%s>' % (tag, val, tag.split()[0])
                mpp[key] = (u"<span style='font-size: 15px'>"
                            u"%s%s</span><br>\n  ") % (title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', '', 'b style="font-size: 17px"')
        wrapOrZap('recipient', 'Til')
        wrapOrZap('cc', 'Kopi til')

        # create initial HTML version
        html = u'''<!DOCTYPE html>
<html lang="da">
<head>
  <meta charset="utf-8">
  <title>%(title)s</title>
</head>
<body style='font-family: Helvetica, sans-serif; font-size: 14px;'>
<h1>%(title)s</h1>
<div class='hd' style='padding:5px;background-color:#eee;margin-bottom:15px;'>
  %(sender)s%(recipient)s%(cc)s<span>%(date_string)s</span>
</div>
<div class='text'>
  %(html)s
</div>
</body>
</html>''' % mpp
        html = sbs4.beautify(html)

        # First look for inline images, if any
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            if not imgtag.has_attr('src'):
                continue  # ignore
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid

        # Next, handle attachments
        # attachments: email attachments ready for attachment :)
        attachments = []
        for atag in html.findAll('a'):
            try:
                url = atag['href']
            except KeyError:
                atag.replaceWithChildren()  # kill the "broken" link
                continue
            url = atag['href']
            if url.startswith('/') or config.options.hostname in url:  # onsite
                data = None
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError:
                    # unable to fetch URL
                    config.log(
                        u'%s: Kan ikke hente flg. URL: %s' %
                        (self.mp['title'] if self.mp['title'] else self, url))
                if data:
                    eatt = generateMIMEAttachment(url, data, None)
                    attachments.append(eatt)
                    atag.replaceWithChildren()  # kill the actual link

        # Attach actual attachments (if any)
        for (url, text) in self.mp['attatchments']:
            data = surllib.skoleGetURL(url, False)
            eatt = generateMIMEAttachment(url, data, text)
            attachments.append(eatt)

        # Now, put the pieces together
        html = html.prettify()
        msgHtml = MIMEText(html, 'html', 'utf-8')
        if not iimgs and not attachments:
            # pure HTML version
            msg = msgHtml
        else:
            # Inline images but no attachments
            #   multipart/related
            #     text/html with html text
            #     image/xxx with inline images
            # OR
            # email with inline images + attachment
            #   multipart/mixed
            #     text/html med html udgave
            #     image/gif med billede
            #     application/xxx with word document
            if attachments:
                msg = MIMEMultipart('mixed', type='text/html')
            else:
                msg = MIMEMultipart('related', type='text/html')
            del msgHtml['MIME-Version']
            msg.attach(msgHtml)

            # Attach images if any
            for (url, (cid, data)) in iimgs.items():
                m = MIMEImage(data)
                m.add_header('Content-ID', '<%s>' % cid)
                fn = niceFilename(url)
                m.add_header('Content-Disposition',
                             'inline',
                             filename=headerEncodeField(fn))

                del m['MIME-Version']
                msg.attach(m)

            # Attach attachments if any
            for attachment in attachments:
                del attachment['MIME-Version']
                msg.attach(attachment)

        # Now, for the general headers
        dt = email.utils.formatdate(time.mktime(self.mp['date_ts']), True)
        msg['Received'] = ('from %s ([127.0.0.1] helo=%s) '
                           'by %s with smtp (fskintra) for %s; %s') % (
                               hostname, hostname, hostname,
                               config.options.email, dt)
        msg['Date'] = dt

        title = self.mp['title']
        if self.mp['children']:
            title = u'[%s] %s' % (', '.join(self.mp['children']), title)
        msg['Subject'] = headerEncodeField(title, 60)
        if 'sender' in self.mp and self.mp['sender']:
            sender = u'Skoleintra - %s' % self.mp['sender']
        else:
            sender = u'Skoleintra'
        sender = '%s <%s>' % (headerEncodeField(sender),
                              config.options.senderemail)
        msg['From'] = sender
        msg['To'] = config.options.email

        # Other tags just for ourselves
        keys = 'mid,md5'.split(',')
        for key in keys:
            if self.mp.get(key, None):
                kkey = 'X-skoleintra-%s' % key
                msg[kkey] = headerEncodeField(self.mp[key], 60)

        self._email = msg
        return msg
예제 #39
0
def parseFrontpageItem(cname, div):
    '''Parse a single frontpage news item'''
    # Do we have any comments?
    comments = div.find('div', 'sk-news-item-comments')
    cdiv = u''
    if comments:
        global c
        # Comments are enabled
        txt = comments.text.strip()
        if u'tilføj' not in txt.lower():
            m = re.match(ur'.*vis (\d+) kommentar.*', txt.lower())
            assert (m)
            nc = int(m.group(1))
            if nc > 0:
                suff = '/news/pins/%s/comments' % div['data-feed-item-id']
                url = schildren.getChildURL(cname, suff)
                bs = surllib.skoleGetURL(url,
                                         asSoup=True,
                                         postData={'_': str(nc)})
                cdiv = unicode(bs.find('div', 'sk-comments-container'))
                cdiv = u'<br>' + cdiv

    author = div.find('div', 'sk-news-item-author')
    body = div.find('div', 'sk-news-item-content')
    # trim the body a bit
    body = sbs4.copy(body)  # make a copy as we look for attachments later
    for e in body.select('.sk-news-item-footer, .sk-news-item-comments'):
        e.extract()
    for e in body.select('.h-fnt-bd'):
        e['style'] = 'font-weight: bold'
    for e in body.select('div'):
        # remove empty divs
        contents = u''.join(map(unicode, e.children)).strip()
        if not contents:
            e.extract()
    # Trim extra white space - sometimes unecessary linebreaks are introduced
    sbs4.trimSoup(body)

    msg = semail.Message(cname, SECTION, unicode(body) + cdiv)

    for e in body.select('span, strong, b, i'):
        e.unwrap()
    sbs4.condenseSoup(body)

    title = body.get_text(u'\n', strip=True).strip().split(u'\n')[0]
    title = title.replace(u'\xa0', u' ').strip()
    title = u' '.join(title.rstrip(u' .').split())

    msg.setTitle(title, True)
    msg.setMessageID(div['data-feed-item-id'])
    msg.setSender(author.span.text)

    # Find list of recipients
    author.span.extract()  # Remove author
    for tag in [
            author.span,  # Remove author
            author.find('span', 'sk-news-item-for'),  # Remove 'til'
            author.find('span', 'sk-news-item-and'),  # Remove ' og '
            author.find('a', 'sk-news-show-more-link')
    ]:
        if tag:
            tag.extract()
    recp = re.sub(ur'\s*(,| og )\s*', ',', author.text.strip())
    recp = recp.split(u',')
    msg.setRecipient(recp)

    msg.setDateTime(div.find('div', 'sk-news-item-timestamp').text)

    # Do we have any attachments?
    divA = div.find('div', 'sk-attachments-list')
    if divA:
        for att in (divA.findAll('a') or []):
            url = att['href']
            text = att.text.strip()
            msg.addAttachment(url, text)

    return msg
예제 #40
0
def parseMessages(cname, bs):
    '''Look for new messages in each conversation'''
    # Look for a div with a very long attribute with json
    main = bs.find('div', 'sk-l-content-wrapper')
    conversations = None
    for d in main.findAll('div'):
        for a in d.attrs:
            if 'message' not in a.lower() or len(d[a]) < 100:
                continue
            try:
                jsn = json.loads(d[a])
                if type(jsn) == dict:
                    conversations = jsn.get('Conversations')
                    break
            except ValueError:
                continue

    if not conversations:
        config.clog(cname, 'Ingen beskeder fundet?!?', -1)
        return []

    emsgs = []
    for i, c in enumerate(conversations[::1]):
        tid = c.get('ThreadId')
        lmid = unicode(c.get('LatestMessageId'))
        if not tid:
            # ThreadId can be empty if this is a msg to all students
            tid = ''
        if not lmid:
            config.clog(cname, u'Noget galt i tråd #%d %r %r'
                        % (i, tid, lmid), -1)
            continue

        if semail.hasSentMessage(tp=SECTION, mid=(tid, lmid)):
            continue

        # This last messages has not been seen - load the entire conversation
        if tid:
            suffix = (
                '/messages/conversations/loadmessagesforselectedconversation' +
                '?threadId=' + tid +
                '&takeFromRootMessageId=' + lmid +
                '&takeToMessageId=0' +
                '&searchRequest=')
        else:
            suffix = (
                '/messages/conversations/getmessageforthreadlessconversation' +
                '?messageId=' + lmid)
        curl = schildren.getChildURL(cname, suffix)
        data = surllib.skoleGetURL(curl, asSoup=False, noCache=True,
                                   addTimeSuffix=True)

        try:
            jsn = json.loads(data)
        except ValueError:
            config.clog(cname, 'Kan ikke indlæse besked-listen i tråd %d %r %r'
                        % (i, tid, lmid), -1)
            continue

        msgs = jsn if tid else [jsn]

        assert(type(msgs) == list)
        for jsn in msgs[::-1]:
            mid = unicode(jsn.get('Id'))
            if semail.hasSentMessage(tp=SECTION, mid=(tid, mid)):
                continue

            # Generate new messages with this content
            emsgs.append(msgFromJson(cname, jsn, tid))

    return emsgs
예제 #41
0
def docFindDocuments(bs, foldername='Dokumentarkiv'):
    '''Input beatifulsoup with content from a page of documents
    Looks at this and all subfolders, and sends any new messages'''

    trs = bs.findAll('tr')

    for line in trs:
        if not line.has_key('class'):
            continue
        if not [c for c in line['class'].split() if c.startswith('linje')]:
            continue

        links = line.findAll('a')
        assert(len(links) >= 2)

        # find file type
        ext = links[0].img['src'].split('/')[-1][2:-4].lower()

        # find name of file
        title = links[1].text
        ltitle = foldername + ' / ' + title

        # find url
        url = links[0]['href']
        if 'visDokument' in url:
            url = URL_DOC + re.search('.*?(\d+)', links[0]['href']).group(1)
        else:
            assert('Dokliste' in url)
        url = urllib.quote(url.encode('iso-8859-1'), safe=':/?=&%')

        # find date
        dts = line.findAll('td', width='18%')
        assert(len(dts) == 1 and dts[0].text)  # exactly one date
        date = dts[0].text

        # now do stuff
        if 'Dokliste' in url:
            # this is a subfolder

            # first look at (potentially cached version)
            suburl = URL_PREFIX + url
            subbs = surllib.skoleGetURL(suburl, True)

            subdate = datetime.date(*reversed(map(int, date.split('-'))))
            if subbs.cachedate <= subdate or \
               (datetime.date.today() - subbs.cachedate).days > 2:
                # cached version is too old - refetch
                subbs = surllib.skoleGetURL(suburl, True, True)
                config.log(u'Kigger på folderen %s' % title)
            else:
                config.log(u'Kigger på folderen %s (fra cache)' % title)

            docFindDocuments(subbs, ltitle)
        else:
            # this is an actual document
            config.log(u'Kigger på dokumentet %s' % ltitle)

            # Create HTML snippet
            html = u"<p>Nyt dokument: <a href=''>%s</a></p>" % ltitle
            h = surllib.beautify(html)
            h.a['href'] = url
            h.a['usefilename'] = title + '.' + ext

            msg = semail.Message('documents', h)
            msg.setTitle(u'%s' % title)
            msg.setDate(date)
            msg.maybeSend()
예제 #42
0
    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title, tag=''):
            if title:
                title += u': '
            val = mpp.get(key, None)
            if val:
                if tag:
                    val = u'<%s>%s</%s>' % (tag, val, tag.split()[0])
                mpp[key] = (u"<span style='font-size: 15px'>"
                            u"%s%s</span><br>\n  ") % (title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', '', 'b style="font-size: 17px"')
        wrapOrZap('recipient', 'Til')
        wrapOrZap('cc', 'Kopi til')

        # create initial HTML version
        html = u'''<!DOCTYPE html>
<html lang="da">
<head>
  <meta charset="utf-8">
  <title>%(title)s</title>
</head>
<body style='font-family: Helvetica, sans-serif; font-size: 14px;'>
<h1>%(title)s</h1>
<div class='hd' style='padding:5px;background-color:#eee;margin-bottom:15px;'>
  %(sender)s%(recipient)s%(cc)s<span>%(date_string)s</span>
</div>
<div class='text'>
  %(html)s
</div>
</body>
</html>''' % mpp
        html = sbs4.beautify(html)

        # First look for inline images, if any
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            if not imgtag.has_attr('src'):
                continue  # ignore
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid

        # Next, handle attachments
        # attachments: email attachments ready for attachment :)
        attachments = []
        for atag in html.findAll('a'):
            try:
                url = atag['href']
            except KeyError:
                atag.replaceWithChildren()  # kill the "broken" link
                continue
            url = atag['href']
            if url.startswith('/') or config.options.hostname in url:  # onsite
                data = None
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError:
                    # unable to fetch URL
                    config.log(u'%s: Kan ikke hente flg. URL: %s' %
                               (self.mp['title'] if self.mp['title'] else self,
                                url))
                if data:
                    eatt = generateMIMEAttachment(url, data, None)
                    attachments.append(eatt)
                    atag.replaceWithChildren()  # kill the actual link

        # Attach actual attachments (if any)
        for (url, text) in self.mp['attatchments']:
            data = surllib.skoleGetURL(url, False)
            eatt = generateMIMEAttachment(url, data, text)
            attachments.append(eatt)

        # Now, put the pieces together
        html = html.prettify()
        msgHtml = MIMEText(html, 'html', 'utf-8')
        if not iimgs and not attachments:
            # pure HTML version
            msg = msgHtml
        else:
            # Inline images but no attachments
            #   multipart/related
            #     text/html with html text
            #     image/xxx with inline images
            # OR
            # email with inline images + attachment
            #   multipart/mixed
            #     text/html med html udgave
            #     image/gif med billede
            #     application/xxx with word document
            if attachments:
                msg = MIMEMultipart('mixed', type='text/html')
            else:
                msg = MIMEMultipart('related', type='text/html')
            del msgHtml['MIME-Version']
            msg.attach(msgHtml)

            # Attach images if any
            for (url, (cid, data)) in iimgs.items():
                m = MIMEImage(data)
                m.add_header('Content-ID', '<%s>' % cid)
                fn = niceFilename(url)
                m.add_header('Content-Disposition', 'inline',
                             filename=headerEncodeField(fn))

                del m['MIME-Version']
                msg.attach(m)

            # Attach attachments if any
            for attachment in attachments:
                del attachment['MIME-Version']
                msg.attach(attachment)

        # Now, for the general headers
        dt = email.utils.formatdate(time.mktime(self.mp['date_ts']), True)
        msg['Received'] = ('from %s ([127.0.0.1] helo=%s) '
                           'by %s with smtp (fskintra) for %s; %s'
                           ) % (hostname, hostname, hostname,
                                config.options.email, dt)
        msg['Date'] = dt

        title = self.mp['title']
        if self.mp['children']:
            title = u'[%s] %s' % (', '.join(self.mp['children']), title)
        msg['Subject'] = headerEncodeField(title, 60)
        if 'sender' in self.mp and self.mp['sender']:
            sender = u'Skoleintra - %s' % self.mp['sender']
        else:
            sender = u'Skoleintra'
        sender = '%s <%s>' % (headerEncodeField(sender),
                              config.options.senderemail)
        msg['From'] = sender
        msg['To'] = config.options.email

        # Other tags just for ourselves
        keys = 'mid,md5'.split(',')
        for key in keys:
            if self.mp.get(key, None):
                kkey = 'X-skoleintra-%s' % key
                msg[kkey] = headerEncodeField(self.mp[key], 60)

        self._email = msg
        return msg
예제 #43
0
    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title):
            val = self.mp.get(key, None)
            if val:
                mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n"
                mpp[key] %= (key, title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', 'Fra')
        wrapOrZap('recipient', 'Til')
        if mpp.get('time', None):
            mpp['ttime'] = u' ' + mpp['time']
        else:
            mpp['ttime'] = u''

        # create initial HTML version
        html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
  <title>%(title)s</title>
</head>
<body style='font-family: Verdana,Arial,Helvetica'>
<h1>%(title)s</h1>
<div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'>
%(sender)s%(recipient)s  <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p>
</div>
<div class='text'>
  %(nicehtml)s
</div>
</body>
</html>
'''
        html %= mpp
        html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html)

        # first look for inline images (if any)
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError, e:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid
예제 #44
0
def getWeekplan(cname, url):
    bs = surllib.skoleGetURL(url, True, noCache=True)
    return formatWeekplan(bs)
예제 #45
0
def getWeekplan(cname, url):
    bs = surllib.skoleGetURL(url, True, noCache=True)
    return formatWeekplan(bs)
예제 #46
0
class Message:
    def __init__(self, type, phtml):
        self.mp = {}

        self.mp['type'] = type  # frontpage or ...
        self.mp['phtml'] = phtml  # use self.data in general
        self.mp['data'] = str(phtml).decode('utf-8')
        self.mp['childname'] = config.CHILDNAME

        # not set by constructor
        self.mp['title'] = None
        self.mp['date'] = None
        self.mp['time'] = None
        self.mp['sender'] = None
        self.mp['recipient'] = None
        self.mp['mid'] = None
        self._email = None

    def __repr__(self):
        txt = u'<semail.Message'
        keys = 'type,mid,date,time,title,sender'.split(',')
        for key in keys:
            if key in self.mp and self.mp[key]:
                txt += u' %s=%s' % (key, repr(self.mp[key]))
        txt += u'>'
        return txt

    def setTitle(self, title, shorten=False):
        if shorten and len(title) > 40:
            title = title[:40] + title[40:].split(' ', 2)[0] + '...'
        self.mp['title'] = title

    def setDate(self, date):
        date = date.strip()
        if ' ' in date:  # also time
            date, time = date.split()
            self.setTime(time)
        self.mp['date'] = date

    def setTime(self, time):
        self.mp['time'] = time

    def setSender(self, sender):
        self.mp['sender'] = sender

    def setRecipient(self, recipient):
        self.mp['recipient'] = recipient

    def setMessageID(self, mid):
        self.mp['mid'] = mid

    def updatePersonDate(self, phtml=None):
        if phtml:
            d = phtml.renderContents().decode('utf-8')
        else:
            d = self.mp['data']
        assert (type(d) == unicode)  # must be unicode

        # e.g. front page pics
        m = re.findall(u'>(?:Lagt ind|Skrevet) af ([^<]*?) den ([-0-9]*?)<', d)
        if m:
            m = m[-1]
            self.setSender(m[0])
            self.setDate(m[1])
            return

        m = re.findall(
            u'(?s)<small>Besked fra([^<]*?) - (?:modtaget|sendt) '
            u'den ([^<]*?)</small>', d)
        if not m:
            m = re.findall(
                u'(?s)<small>Oprettet af([^<]*?) '
                u'den ([^<]*?)</small>', d)

        if m:
            m = m[0]
            self.setSender(m[0].strip())
            self.setDate(m[1].strip())
            return
        else:
            # neither Sender nor date/time found
            config.log('No sender found', 2)
            return

    def prepareMessage(self):
        # add missing fields, if any

        if not self.mp.get('md5', None):
            keys = 'type,date,title,data'.split(',')
            txt = u' '.join([self.mp[x] for x in keys if self.mp.get(x, None)])
            self.mp['md5'] = unicode(md5.md5(txt.encode('utf-8')).hexdigest())

        if not self.mp.get('date', None):
            # use today as the date
            self.setDate(time.strftime('%d-%m-%Y')),  # today

        # create nice version of the raw html
        if not 'nicehtml' in self.mp:
            self.mp['nicehtml'] = nicehtml(self.mp['data'])

    def getMessageID(self):
        if self.mp.get('mid', None):
            return self.mp['mid']
        else:
            self.prepareMessage()
            return self.mp['md5']

    def getLongMessageID(self):
        dt = '-'.join(reversed(self.mp['date'].split('-')))
        return '%s--%s' % (dt, self.getMessageID())

    def hasBeenSent(self):
        ''' Tests whether this email has previously been sent'''
        mid = self.getMessageID()
        old = glob.glob(os.path.join(config.MSG_DN, '*--%s' % mid))
        return old

    def store(self):
        mid = self.getMessageID()
        dn = os.path.join(config.MSG_DN, self.getLongMessageID())
        if os.path.isdir(dn):
            # already stored - ignore!
            return False
        tdn = dn + '.tmp'
        if os.path.isdir(tdn):
            config.log('Removing previous temporary directory %s' % repr(tdn),
                       2)
            shutil.rmtree(tdn)  # Remove stuff
        os.mkdir(tdn)

        fd = open(os.path.join(tdn, mid + '.eml'), 'wb')
        fd.write(str(self.asEmail()))
        fd.close()

        mpp = [(unicode(k), unicode(v)) for (k, v) in self.mp.items()]
        fd = codecs.open(os.path.join(tdn, mid + '.txt'), 'wb', 'utf-8')
        fd.write(repr(mpp))
        fd.close()

        os.rename(tdn, dn)
        return True

    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title):
            val = self.mp.get(key, None)
            if val:
                mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n"
                mpp[key] %= (key, title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', 'Fra')
        wrapOrZap('recipient', 'Til')
        if mpp.get('time', None):
            mpp['ttime'] = u' ' + mpp['time']
        else:
            mpp['ttime'] = u''

        # create initial HTML version
        html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
  <title>%(title)s</title>
</head>
<body style='font-family: Verdana,Arial,Helvetica'>
<h1>%(title)s</h1>
<div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'>
%(sender)s%(recipient)s  <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p>
</div>
<div class='text'>
  %(nicehtml)s
</div>
</body>
</html>
'''
        html %= mpp
        html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html)

        # first look for inline images (if any)
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError, e:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid

        # next, handle attachments
        # attachments: email attachments ready for attachment :)
        attachments = []
        for atag in html.findAll('a'):
            try:
                url = atag['href']
            except KeyError:
                atag.replaceWithChildren()  # kill the "broken" link
                continue
            url = atag['href']
            if 'Tilmelding/Oversigt.asp' in url:
                atag.replaceWithChildren()  # kill link
                continue
            if url.startswith('/') or config.HOSTNAME in url:  # onsite!
                data = None
                try:
                    data = surllib.skoleGetURL(url, False)
                except:
                    # unable to fetch URL
                    config.log(
                        u'%s: Kan ikke hente flg. URL: %s' %
                        (self.mp['title'] if self.mp['title'] else self, url))
                if data:
                    if atag.has_key('usefilename'):
                        usefilename = atag['usefilename']
                    else:
                        usefilename = None
                    eatt = generateMIMEAttachment(url, data, usefilename)
                    attachments.append(eatt)
                    atag.replaceWithChildren()  # kill the actual link

        # now, put the pieces together
        html = html.prettify().decode('utf-8')
        msgHtml = MIMEText(html, 'html', 'utf-8')
        if not iimgs and not attachments:
            # pure HTML version
            msg = msgHtml
        else:
            # inline images but no attachments
            #   multipart/related
            #     text/html with html text
            #     image/xxx with inline images
            # OR
            # email with inline images + attachment
            #   multipart/mixed
            #     text/html med html udgave
            #     image/gif med billede
            #     application/xxx with word document
            if attachments:
                msg = MIMEMultipart('mixed', type='text/html')
            else:
                msg = MIMEMultipart('related', type='text/html')
            del msgHtml['MIME-Version']
            msg.attach(msgHtml)

            # attach images if any
            for (url, (cid, data)) in iimgs.items():
                m = MIMEImage(data)
                m.add_header('Content-ID', '<%s>' % cid)
                fn = os.path.basename(url).encode('utf-8')
                m.add_header('Content-Disposition',
                             'inline',
                             filename=('utf-8', '', fn))

                del m['MIME-Version']
                msg.attach(m)

            # attach attachments if any
            for attachment in attachments:
                del attachment['MIME-Version']
                msg.attach(attachment)

        # now for the general headers
        dt = self.mp['date']
        if self.mp.get('time', None):
            dt += ' ' + self.mp['time']
        else:
            if dt == time.strftime('%d-%m-%Y'):  # today
                ts = time.strftime('%H:%M:%S')
                if ts > '12:00:00':
                    ts = '12:00:00'
                dt += ' ' + ts
            else:
                dt += ' 12:00:00'
        dt = time.strptime(dt, '%d-%m-%Y %H:%M:%S')
        dt = email.utils.formatdate(time.mktime(dt), True)
        msg['Received'] = ('from %s ([127.0.0.1] helo=%s) '
                           'by %s with smtp (fskintra) for %s; %s') % (
                               hostname, hostname, hostname, config.EMAIL, dt)
        msg['Date'] = dt

        title = self.mp['title']
        if self.mp['childname']:
            title = u'[%s] %s' % (self.mp['childname'], title)
        msg['Subject'] = Header(title, 'utf-8', 60)
        if 'sender' in self.mp and self.mp['sender']:
            sender = u'Skoleintra - %s' % self.mp['sender']
        else:
            sender = u'Skoleintra'
        sender = headerEncodeField(sender) + u' <%s>' % config.SENDER
        msg['From'] = sender
        msg['To'] = config.EMAIL

        # other tags just for ourselves
        keys = 'mid,md5'.split(',')
        for key in keys:
            if self.mp.get(key, None):
                kkey = 'X-skoleintra-%s' % key
                msg[kkey] = Header(self.mp[key], 'utf-8', header_name=kkey)

        self._email = msg
        return msg
예제 #47
0
def parseFrontpageItem(cname, div):
    '''Parse a single frontpage news item'''
    # Do we have any comments?
    comments = div.find('div', 'sk-news-item-comments')
    cdiv = u''
    if comments:
        global c
        # Comments are enabled
        txt = comments.text.strip()
        if u'tilføj' not in txt.lower():
            m = re.match(ur'.*vis (\d+) kommentar.*', txt.lower())
            assert(m)
            nc = int(m.group(1))
            if nc > 0:
                suff = '/news/pins/%s/comments' % div['data-feed-item-id']
                url = schildren.getChildURL(cname, suff)
                bs = surllib.skoleGetURL(url, asSoup=True, postData={'_': str(nc)})
                cdiv = unicode(bs.find('div', 'sk-comments-container'))
                cdiv = u'<br>' + cdiv

    author = div.find('div', 'sk-news-item-author')
    body = div.find('div', 'sk-news-item-content')
    # trim the body a bit
    body = sbs4.copy(body)  # make a copy as we look for attachments later
    for e in body.select('.sk-attachments-list, .sk-news-item-comments'):
        e.extract()
    for e in body.select('.h-fnt-bd'):
        e['style'] = 'font-weight: bold'
    for e in body.select('div'):
        # remove empty divs
        contents = u''.join(map(unicode, e.children)).strip()
        if not contents:
            e.extract()
    # Trim extra white space - sometimes unecessary linebreaks are introduced
    sbs4.trimSoup(body)

    msg = semail.Message(cname, SECTION, unicode(body)+cdiv)

    for e in body.select('span, strong, b, i'):
        e.unwrap()
    sbs4.condenseSoup(body)

    title = body.get_text(u'\n', strip=True).strip().split(u'\n')[0]
    title = title.replace(u'\xa0', u' ').strip()
    title = u' '.join(title.rstrip(u' .').split())

    msg.setTitle(title, True)
    msg.setMessageID(div['data-feed-item-id'])
    msg.setSender(author.span.text)

    # Find list of recipients
    author.span.extract()  # Remove author
    for tag in [
            author.span,  # Remove author
            author.find('span', 'sk-news-item-for'),  # Remove 'til'
            author.find('span', 'sk-news-item-and'),  # Remove ' og '
            author.find('a', 'sk-news-show-more-link')]:
        if tag:
            tag.extract()
    recp = re.sub(ur'\s*(,| og )\s*', ',', author.text.strip())
    recp = recp.split(u',')
    msg.setRecipient(recp)

    msg.setDateTime(div.find('div', 'sk-news-item-timestamp').text)

    # Do we have any attachments?
    divA = div.find('div', 'sk-attachments-list')
    if divA:
        for att in (divA.findAll('a') or []):
            url = att['href']
            text = att.text.strip()
            msg.addAttachment(url, text)

    return msg
예제 #48
0
파일: semail.py 프로젝트: legart/fskintra
    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title):
            val = self.mp.get(key, None)
            if val:
                mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n"
                mpp[key] %= (key, title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', 'Fra')
        wrapOrZap('recipient', 'Til')
        if mpp.get('time', None):
            mpp['ttime'] = u' ' + mpp['time']
        else:
            mpp['ttime'] = u''

        # create initial HTML version
        html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
  <title>%(title)s</title>
</head>
<body style='font-family: Verdana,Arial,Helvetica'>
<h1>%(title)s</h1>
<div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'>
%(sender)s%(recipient)s  <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p>
</div>
<div class='text'>
  %(nicehtml)s
</div>
</body>
</html>
'''
        html %= mpp
        html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html)

        # first look for inline images (if any)
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError, e:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid
예제 #49
0
def skoleFrontpage():
    surllib.skoleLogin()

    config.log('Behandler forsiden')

    url = 'http://%s/Infoweb/Fi2/Forside.asp' % config.HOSTNAME
    data = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    br = surllib.getBrowser()
    aurl = br.geturl()
    if u'Personoplysninger.asp' in aurl:
        # We are actually asked to confirm our personal data
        config.log(u'Bekræfter først vores personlige data')
        skoleConfirmPersonalData(data)

        data = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    # find main table
    maint = []
    for mt in data.findAll('table'):
        if mt.findParents('table') or mt.has_key('bgcolor'):
            continue
        txt = mt.text
        if len(txt) < 30 and txt.lower().startswith(u'forældreintra for '):
            continue  # just the title
        maint.append(mt)
    assert(len(maint) == 1)  # assume exactly one main table

    maint = maint[0]

    # find interesting table tags
    itags = []
    for tag in maint:
        for ttag in tag.findAll('table'):
            if ttag.text:
                itags.append(ttag)

    g = []
    for itag in itags:
        t = _getTitle(itag)
        if t is None:
            # not a title
            if not g:
                # In some cases (slideshows), the real title may be missing
                g.append((itags[0].text, []))
            g[-1][1].append(itag)
        else:
            # we have a new title
            g.append((t, []))

    for (t, xs) in g:
        ignore = len(xs) == 0 or t in TITLE_IGNORE
        config.log(u'Kategori [%s]%s' %
                   (t, ' (hoppes over)' if ignore else ''))
        if ignore:
            continue

        if t == TITLE_COVERPIC:
            assert(len(xs) == 1)  # exactly one cover picture
            skoleCoverPic(xs[0])
            continue
        elif t == TITLE_BBB:
            # BBB news are split
            # ignore first table which is a wrapper around all entries
            xs = xs[1:]
            map(skoleFrontBBB, xs)
        elif t == TITLE_NEWS:
            # News from...
            skoleNewsFrom(xs)
        else:
            # send msg if something has changed
            for x in xs:
                skoleOtherStuff(t, x)
예제 #50
0
파일: semail.py 프로젝트: webjay/fskintra
    def asEmail(self):
        if self._email:
            return self._email
        self.prepareMessage()
        hostname = socket.getfqdn()  # used below in a few places

        mpp = self.mp.copy()

        def wrapOrZap(key, title):
            val = self.mp.get(key, None)
            if val:
                mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n"
                mpp[key] %= (key, title, val)
            else:
                mpp[key] = ''

        wrapOrZap('sender', 'Fra')
        wrapOrZap('recipient', 'Til')
        wrapOrZap('cc', 'Kopi til')
        if mpp.get('time', None):
            mpp['ttime'] = u' ' + mpp['time']
        else:
            mpp['ttime'] = u''

        # create initial HTML version
        html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
  <title>%(title)s</title>
</head>
<body style='font-family: Verdana,Arial,Helvetica'>
<h1>%(title)s</h1>
<div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'>
%(sender)s%(recipient)s%(cc)s  <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p>
</div>
<div class='text'>
  %(nicehtml)s
</div>
</body>
</html>
'''
        html %= mpp
        html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html)

        # first look for inline images (if any)
        # iimags: mapping from URL to (cid, binary string contents)
        iimgs = {}
        for imgtag in html.findAll('img'):
            if not imgtag.has_key('src'):
                continue  # ignore
            url = imgtag['src']
            if url.lower().startswith('data:'):
                # ignore 'inline' images
                continue
            elif not url:
                # ignore empty URLs
                continue
            if url not in iimgs:
                try:
                    data = surllib.skoleGetURL(url, False)
                except urllib2.URLError:
                    # could not fetch URL for some reason - ignore
                    continue
                # is this actually an image?
                if not imghdr.what('', data):
                    continue  # ignore
                cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname)
                iimgs[url] = (cid, data)
            cid, _ = iimgs[url]

            imgtag['src'] = 'cid:' + cid

        # next, handle attachments
        # attachments: email attachments ready for attachment :)
        attachments = []
        for atag in html.findAll('a'):
            try:
                url = atag['href']
            except KeyError:
                atag.replaceWithChildren()  # kill the "broken" link
                continue
            url = atag['href']
            if 'Tilmelding/Oversigt.asp' in url:
                atag.replaceWithChildren()  # kill link
                continue
            if url.startswith('/') or config.HOSTNAME in url:  # onsite!
                data = None
                try:
                    data = surllib.skoleGetURL(url, False)
                except:
                    # unable to fetch URL
                    config.log(
                        u'%s: Kan ikke hente flg. URL: %s' %
                        (self.mp['title'] if self.mp['title'] else self, url))
                if data:
                    if atag.has_key('usefilename'):
                        usefilename = atag['usefilename']
                    else:
                        usefilename = None
                    eatt = generateMIMEAttachment(url, data, usefilename)
                    attachments.append(eatt)
                    atag.replaceWithChildren()  # kill the actual link

        # now, put the pieces together
        html = html.prettify().decode('utf-8')
        msgHtml = MIMEText(html, 'html', 'utf-8')
        if not iimgs and not attachments:
            # pure HTML version
            msg = msgHtml
        else:
            # inline images but no attachments
            #   multipart/related
            #     text/html with html text
            #     image/xxx with inline images
            # OR
            # email with inline images + attachment
            #   multipart/mixed
            #     text/html med html udgave
            #     image/gif med billede
            #     application/xxx with word document
            if attachments:
                msg = MIMEMultipart('mixed', type='text/html')
            else:
                msg = MIMEMultipart('related', type='text/html')
            del msgHtml['MIME-Version']
            msg.attach(msgHtml)

            # attach images if any
            for (url, (cid, data)) in iimgs.items():
                m = MIMEImage(data)
                m.add_header('Content-ID', '<%s>' % cid)
                fn = os.path.basename(url).encode('utf-8')
                m.add_header('Content-Disposition',
                             'inline',
                             filename=('utf-8', '', fn))

                del m['MIME-Version']
                msg.attach(m)

            # attach attachments if any
            for attachment in attachments:
                del attachment['MIME-Version']
                msg.attach(attachment)

        # now for the general headers
        dt = self.mp['date']
        if self.mp.get('time', None):
            dt += ' ' + self.mp['time']
        else:
            if dt == time.strftime('%d-%m-%Y'):  # today
                ts = time.strftime('%H:%M:%S')
                if ts > '12:00:00':
                    ts = '12:00:00'
                dt += ' ' + ts
            else:
                dt += ' 12:00:00'
        dt = time.strptime(dt, '%d-%m-%Y %H:%M:%S')
        dt = email.utils.formatdate(time.mktime(dt), True)
        msg['Received'] = ('from %s ([127.0.0.1] helo=%s) '
                           'by %s with smtp (fskintra) for %s; %s') % (
                               hostname, hostname, hostname, config.EMAIL, dt)
        msg['Date'] = dt

        title = self.mp['title']
        if self.mp['childname']:
            title = u'[%s] %s' % (self.mp['childname'], title)
        msg['Subject'] = Header(title, 'utf-8', 60)
        if 'sender' in self.mp and self.mp['sender']:
            sender = u'Skoleintra - %s' % self.mp['sender']
        else:
            sender = u'Skoleintra'
        sender = headerEncodeField(sender) + u' <%s>' % config.SENDER
        msg['From'] = sender
        msg['To'] = config.EMAIL

        # other tags just for ourselves
        keys = 'mid,md5'.split(',')
        for key in keys:
            if self.mp.get(key, None):
                kkey = 'X-skoleintra-%s' % key
                msg[kkey] = Header(self.mp[key], 'utf-8', header_name=kkey)

        self._email = msg
        return msg
예제 #51
0
def docFindDocuments(bs, foldername="Dokumentarkiv"):
    """Input beatifulsoup with content from a page of documents
    Looks at this and all subfolders, and sends any new messages"""

    trs = bs.findAll("tr")

    for line in trs:
        if not line.has_key("class"):
            continue
        if not [c for c in line["class"].split() if c.startswith("linje")]:
            continue

        links = line.findAll("a")
        assert len(links) >= 2

        # find file type
        ext = links[0].img["src"].split("/")[-1][2:-4].lower()

        # find name of file
        title = links[1].text
        ltitle = foldername + " / " + title

        # find url
        url = links[0]["href"]
        config.log(u"Kigger på dokument url: %s" % url, 3)
        m = re.match(r"javascript:visdokument\((\d+),'([^']+)'\).*", url)
        if m:
            url = m.group(2)
        elif "visdokument" in url.lower():
            url = URL_DOC + re.search(".*?(\d+)", links[0]["href"]).group(1)
        elif links[0].has_key("onclick") and "visdok" in links[0]["onclick"]:
            url = url  # href is actually the file url
        else:
            assert "Dokliste" in url
        url = urllib.quote(url.encode("iso-8859-1"), safe=":/?=&%")

        # find date
        dts = line.findAll("td", width="18%")
        assert len(dts) == 1 and dts[0].text  # exactly one date
        date = dts[0].text

        # now do stuff
        if "Dokliste" in url:
            # this is a subfolder

            # first look at (potentially cached version)
            suburl = URL_PREFIX + url
            subbs = surllib.skoleGetURL(suburl, True)

            subdate = datetime.date(*reversed(map(int, date.split("-"))))
            if subbs.cachedate <= subdate or subbs.cacheage >= 1.9:
                # cached version is too old - refetch
                subbs = surllib.skoleGetURL(suburl, True, True)
                config.log(u"Kigger på folderen %s" % title)
            else:
                config.log(u"Kigger på folderen %s (fra cache)" % title)

            docFindDocuments(subbs, ltitle)
        else:
            # this is an actual document
            config.log(u"Kigger på dokumentet %s" % ltitle)

            # Create HTML snippet
            html = u"<p>Nyt dokument: <a href=''>%s</a></p>" % ltitle
            h = surllib.beautify(html)
            h.a["href"] = url
            h.a["usefilename"] = title + "." + ext

            msg = semail.Message("documents", h)
            msg.setTitle(u"%s" % title)
            msg.setDate(date)
            msg.maybeSend()
예제 #52
0
def parseMessages(cname, bs):
    '''Look for new messages in each conversation'''
    # Look for a div with a very long attribute with json
    main = bs.find('div', 'sk-l-content-wrapper')
    conversations = None
    for d in main.findAll('div'):
        for a in d.attrs:
            if 'message' not in a.lower() or len(d[a]) < 100:
                continue
            try:
                jsn = json.loads(d[a])
                if type(jsn) == dict:
                    conversations = jsn.get('Conversations')
                    break
            except ValueError:
                continue

    if not conversations:
        config.clog(cname, 'Ingen beskeder fundet?!?', -1)
        return []

    emsgs = []
    for i, c in enumerate(conversations[::1]):
        tid = c.get('ThreadId')
        lmid = unicode(c.get('LatestMessageId'))
        if not tid:
            # ThreadId can be empty if this is a msg to all students
            tid = ''
        if not lmid:
            config.clog(cname, u'Noget galt i tråd #%d %r %r' % (i, tid, lmid),
                        -1)
            continue

        if semail.hasSentMessage(tp=SECTION, mid=(tid, lmid)):
            continue

        # This last messages has not been seen - load the entire conversation
        if tid:
            suffix = (
                '/messages/conversations/loadmessagesforselectedconversation' +
                '?threadId=' + tid + '&takeFromRootMessageId=' + lmid +
                '&takeToMessageId=0' + '&searchRequest=')
        else:
            suffix = (
                '/messages/conversations/getmessageforthreadlessconversation' +
                '?messageId=' + lmid)
        curl = schildren.getChildURL(cname, suffix)
        data = surllib.skoleGetURL(curl,
                                   asSoup=False,
                                   noCache=True,
                                   addTimeSuffix=True)

        try:
            jsn = json.loads(data)
        except ValueError:
            config.clog(
                cname, 'Kan ikke indlæse besked-listen i tråd %d %r %r' %
                (i, tid, lmid), -1)
            continue

        msgs = jsn if tid else [jsn]

        assert (type(msgs) == list)
        for jsn in msgs[::-1]:
            mid = unicode(jsn.get('Id'))
            if semail.hasSentMessage(tp=SECTION, mid=(tid, mid)):
                continue

            # Generate new messages with this content
            emsgs.append(msgFromJson(cname, jsn, tid))

    return emsgs
예제 #53
0
def skoleFrontpage():
    surllib.skoleLogin()

    config.log('Behandler forsiden')

    url = 'http://%s/Infoweb/Fi2/Forside.asp' % config.HOSTNAME
    data = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    br = surllib.getBrowser()
    aurl = br.geturl()
    if u'Personoplysninger.asp' in aurl:
        # We are actually asked to confirm our personal data
        config.log(u'Bekræfter først vores personlige data')
        skoleConfirmPersonalData(data)

        data = surllib.skoleGetURL(url, asSoup=True, noCache=True)

    # find main table
    maint = []
    for mt in data.findAll('table'):
        if mt.findParents('table') or mt.has_key('bgcolor'):
            continue
        txt = mt.text
        if len(txt) < 30 and txt.lower().startswith(u'forældreintra for '):
            continue  # just the title
        maint.append(mt)
    assert (len(maint) == 1)  # assume exactly one main table

    maint = maint[0]

    # find interesting table tags
    itags = []
    for tag in maint:
        for ttag in tag.findAll('table'):
            if ttag.text:
                itags.append(ttag)

    g = []
    for itag in itags:
        t = _getTitle(itag)
        if t is None:
            # not a title
            if not g:
                # In some cases (slideshows), the real title may be missing
                g.append((itags[0].text, []))
            g[-1][1].append(itag)
        else:
            # we have a new title
            g.append((t, []))

    for (t, xs) in g:
        ignore = len(xs) == 0 or t in TITLE_IGNORE
        config.log(u'Kategori [%s]%s' %
                   (t, ' (hoppes over)' if ignore else ''))
        if ignore:
            continue

        if t == TITLE_COVERPIC:
            assert (len(xs) == 1)  # exactly one cover picture
            skoleCoverPic(xs[0])
            continue
        elif t == TITLE_BBB:
            # BBB news are split
            # ignore first table which is a wrapper around all entries
            xs = xs[1:]
            map(skoleFrontBBB, xs)
        elif t == TITLE_NEWS:
            # News from...
            skoleNewsFrom(xs)
        else:
            # send msg if something has changed
            for x in xs:
                skoleOtherStuff(t, x)