def listsCheckList(postData, listtype): global bs, tbl # Fetch potential cached version bs = surllib.skoleGetURL(URL_MAIN, True, False, True, postData) if bs.cacheage > 6.9: bs = surllib.skoleGetURL(URL_MAIN, True, True, True, postData) tbl = bs.findAll('table')[2] if listtype == 'V6': # Remove links to pictures of parents for a in tbl.findAll('a'): a.replaceWithChildren() tr = tbl.find('tr') if tr.find('h2'): title = tr.find('h2').text tr.extract() elif tbl.find('h2'): title = tbl.find('h2').text else: title = u'Kontaktoplysninger' msg = semail.Message('contactList', tbl) msg.setTitle(title) semail.maybeEmail(msg)
def msgFromJson(cname, jsn, threadId=''): '''Input is a decoded JSON representation of a message (Besked). Output is an semail.Message ready to be sent''' # We have never seen this set to anything -- need to check # when this happens assert (not jsn['AdditionalLinkUrl']) html = u'<div class="base">%s</div>\n' % jsn['BaseText'] if jsn['PreviousMessagesText']: jsn['Subject'] = u'Re: ' + jsn['Subject'] html += u'<div class="prev">%s</div>\n' % jsn['PreviousMessagesText'] msg = semail.Message(cname, SECTION, html) if threadId: msg.setMessageID(threadId, str(jsn["Id"])) else: msg.setMessageID(str(jsn["Id"])) msg.setTitle(jsn['Subject']) msg.setDateTime(jsn['SentReceivedDateText']) msg.setRecipient(jsn['Recipients']) msg.setSender(jsn['SenderName']) for att in (jsn['AttachmentsLinks'] or []): msg.addAttachment(att['HrefAttributeValue'], att['Text']) msg.setData({'unread': jsn.get('ShowUnreadIndication', False)}) return msg
def wpFindWeekplans(bs): trs = bs.findAll('tr') for line in trs: if not line.has_key('class'): continue if not [c for c in line['class'].split() if c.startswith('linje')]: continue links = line.findAll('a') assert (len(links) >= 1) # find week date title = links[0].text # find url url = links[0]['href'] url = url.encode('iso-8859-1') url = urlPrefix() + urllib.quote(url, safe=':/?=&%') bs = surllib.skoleGetURL(url, True, True) bs = wpTrimPlan(bs) msg = semail.Message('weekplans', bs) msg.setTitle(u'%s' % title) msg.updatePersonDate() msg.maybeSend()
def docFindDocuments(cname, rootTitle, bs, title): '''Search a folder for new documents''' folder = rootTitle if title: folder += u' / ' + title.replace(u'>', u'/') docs = bs.findAll('div', 'sk-document') config.clog(cname, u'%s: %d dokumenter fundet ' % (folder, len(docs))) for doc in docs: docTitle = doc.find('span', 'sk-documents-document-title').text.strip() docDate = doc.find('div', 'sk-documents-date-column').text.strip() a = doc.find('a') url = a and a['href'] or '' if '.' in docTitle: sfn = docTitle.rsplit(u'.', 1)[0] else: sfn = docTitle if docTitle and docDate and url: # Create HTML snippet html = u"<p>Nyt dokument: <span></span> / <b></b></p>\n" html += u"<!-- Sidst opdateret: %s -->" % docDate h = sbs4.beautify(html) h.span.string = folder h.b.string = docTitle msg = semail.Message(cname, SECTION, unicode(h)) msg.setTitle(sfn) msg.setDateTime(docDate) msg.addAttachment(url, docTitle) msg.setMessageID(url.split('/')[-1]) msg.maybeSend()
def contactCard(cname, bs): 'Check for new contact information for a single pupil' # Find name name = bs.select('.sk-contact-person-name span.sk-labeledtext-value') assert (name) name = name[0].text.strip() # Change all the div+div+span's into a table table = bs.find('div', 'text-block') assert (table) # If this fails, the design has changed drastically table.name = 'table' for span in table.select('div > span'): span.name = 'td' span.parent.name = 'tr' span['valign'] = 'top' if 'sk-labeledtext-value' in span['class']: span['style'] = 'font-weight:bold;' sbs4.unwrap(table, 'div') for h2 in table.select('h2'): h2.wrap(bs.new_tag('tr')) h2.name = 'td' h2['colspan'] = '2' h2['style'] = 'font-weight:bold; font-size:18px; padding-top:12px' # We do now have two cases depending on whether the image is available photob = bs.find('div', 'photo-block') img = photob.find('img') if img and 'placeholder' not in img['src']: # Image is here img['style'] = ('width:auto;' 'height:auto;' 'max-height:200px;' 'max-width:200px;') table.wrap(bs.new_tag('td')) photob.name = 'td' photob['valign'] = 'top' photob['style'] = 'padding-right: 15px;' photob.parent.name = 'tr' photob.parent.wrap(bs.new_tag('table')) else: # Either no image or image-placeholder is used if photob: photob.decompose() msg = semail.Message(cname, SECTION, unicode(bs)) msg.setTitle(name) msg.setMessageID(bs.url.split('/')[-1]) msg.maybeSend()
def skoleExamineNews(url, mid): bs = surllib.skoleGetURL(url, True) # title + main text title = bs.h3.text main = bs.findAll('table')[3].table # create msg msg = semail.Message(u'dialogue', main) msg.setMessageID(mid) msg.setTitle(title) msg.updatePersonDate() semail.maybeEmail(msg)
def sendPhotos(cname, title, mid, photos): '''Send photos if they have not already been sent''' # First determine if any of the photos were sent earlier previouslySent = set() for dn in semail.hasSentMessage(tp=SECTION, mid=mid): for fn in glob.glob(os.path.join(dn, '*.json')): try: jsn = json.load(open(fn)) except ValueError: continue # Simply ignore files with wrong JSON data = jsn.get('data') if data: previouslySent.update(data) pending = list(url for url in photos if url not in previouslySent) if not pending: return if len(photos) - len(pending) < 5: # At most 5 pictures has been sent earlier - send them all again pending = photos # Send the photos in e-mails of PHOTOS_PER_EMAIL pictures ecount = (len(pending) - 1) / PHOTOS_PER_EMAIL + 1 for ei in range(ecount): pics = pending[:PHOTOS_PER_EMAIL] del pending[:PHOTOS_PER_EMAIL] # Create HTML snippet itag = u'<img style="max-width: 100%">' ebs = sbs4.beautify(u'<h2></h2><p>%s</p>' % u'<br/>'.join([itag] * len(pics))) ebs.h2.string = title for i, img in enumerate(ebs.select('img')): img['src'] = pics[i] msg = semail.Message(cname, SECTION, unicode(ebs)) if ecount > 1: msg.setTitle(u'Billeder: %s (%d/%d)' % (title, ei + 1, ecount)) else: msg.setTitle(u'Billeder: %s' % title) msg.setMessageID(mid) msg.setData(pics) msg.maybeSend()
def skoleConfirmPersonalData(bs): # check that we actually have the right form txts = [ u'Bekræft personoplysninger', u'Navn og adresse:', u'E-mailadresse', u'Fastnettelefon:', u'Mobiltelefon', ] e = False for txt in txts: if txt not in bs.text: config.log(u'Hmmm.. "%s" ikke fundet på bekræftigelsessiden...') e = True if e: return # Find first form, and first table inside the form f = bs.findAll('form') if f: bs = f[0] f = bs.findAll('table') if f: bs = f[0] msg = semail.Message('frontpage', bs) msg.setTitle(u'Bekræft personoplysninger') semail.maybeEmail(msg) # And now, click the button to confirm the details br = surllib.getBrowser() fs = list(br.forms()) if len(fs) == 1 and fs[0].name == 'FrontPage_Form1': # we have one form! br.select_form(fs[0].name) ss = bs.findAll('input', type='submit') if len(ss) == 1 and ss[0]['value'] == TEXT_I_CONFIRM: config.log(u'Bekræfter personlige data') br.submit() # click submit! return # something went wront above config.log(u'Hmmm.. "%s" ikke fundet på Bekræftigelsessiden...')
def skoleFrontBBB(phtml): msg = semail.Message('frontpage', phtml) txt = phtml.renderContents().decode('utf-8') txt = re.sub('<.*?>', ' ', txt) txt = re.sub('[ \n\t]+', ' ', txt) if u'har fødselsdag i dag' in txt and 'Skrevet af' not in txt: # somebody's birthday msg.setTitle(txt) msg.setSender(txt.split(u' har ')[0].strip()) else: txt = re.sub('<.*?>', ' ', txt) txt = re.sub('[ \n\t]+', ' ', txt) msg.setTitle(' '.join(txt.split()), True) msg.updatePersonDate() semail.maybeEmail(msg)
def skoleConfirm(bs): '''Send e-mail wrt confirmation of your own contact details Do not actually click the confirm link. This is done in surllib.''' forms = bs.select('.sk-l-content-wrapper form') assert (forms) form = forms[0] assert (form['action'].endswith('Confirm')) # Find name(s) of children cnames = [] for li in form.select('li'): st = li.text s = st.split() if st else [] if len(s) >= 2 and s[0].lower().startswith('elev:'): cnames.append(unicode(s[1])) if len(s) >= 3: cnames[-1] += u' %s' % s[-1] if not cnames: cnames.append('fskintra') # Clean up HTML for li in form.select('li'): li.name = 'p' li['style'] = 'margin:0' sbs4.extract(form, 'script,div.ccl-formbuttonspanel') sbs4.unwrap(form, 'label,legend,fieldset') for tag in form.select('*'): del tag['class'] for ol in form.select('ol'): ol.name = 'div' ol['style'] = 'margin:10px 0' form.name = 'div' for att in list(form.attrs): del form[att] sbs4.appendTodayComment(form) msg = semail.Message(cnames[0], 'con', unicode(form)) for cname in cnames[1:]: msg.addChild(cname) msg.setTitle(bs.h2.text) msg.maybeSend()
def skoleFrontBBB(phtml): msg = semail.Message('frontpage', phtml) [styletag.replaceWith('') for styletag in phtml.findAll('style')] txt = phtml.renderContents().decode('utf-8') txt = re.sub('<.*?>', ' ', txt) txt = re.sub('[ \n\t]+', ' ', txt) if u'har fødselsdag' in txt and u'Skrevet af' not in txt: # somebody's birthday msg.setTitle(txt) msg.setSender(txt.split(u' har ')[0].strip()) msg.setDate(time.strftime('%d-%m-%Y')) else: txt = re.sub('<.*?>', ' ', txt) txt = re.sub('[ \n\t]+', ' ', txt) msg.setTitle(' '.join(txt.split()), True) msg.updatePersonDate() semail.maybeEmail(msg)
def diaExamineMessage(url, mid): '''Look at the url and mid. Returns True iff an email was sent''' bs = surllib.skoleGetURL(url, True) # first, find main text tr = bs.find('tr', valign='top') assert (tr) phtml = tr.find('td') msg = semail.Message(u'dialogue', phtml) msg.setMessageID(mid) # next, look at the header header = bs.find('table', 'linje1') assert (header) # there must be a header headerLines = header.findAll('tr') assert (len(headerLines) >= 3) # there must be something inside the header for hl in headerLines: txt = hl.text if not txt: continue # ignore elif txt.startswith(u'Denne besked slettes'): pass # ignore elif hl.find('h4'): # title msg.setTitle(txt) elif txt.startswith(u'Besked fra') or txt.startswith(u'Oprettet af'): # Besked fra Frk Nielsen - modtaget den 26-09-2012 20:29:44 msg.updatePersonDate(hl) elif txt.startswith(u'Sendt til '): # Sendt til ... msg.setRecipient(txt.split(u' ', 2)[-1]) elif txt.startswith(u'Kopi til '): # Sendt til ... msg.setCC(txt.split(u' ', 2)[-1]) else: config.log(u'Ukendt header i besked #%s: %s' % (mid, txt), -1) return msg.maybeSend()
def skoleWeekplans(cname): 'Ugeplaner' config.clog(cname, u'Kigger efter nye ugeplaner') url = schildren.getChildURL(cname, 'item/weeklyplansandhomework/list/') bs = surllib.skoleGetURL(url, True, noCache=True) ul = bs.find('ul', 'sk-weekly-plans-list-container') if ul: for a in ul.find_all('a', href=True): url = a['href'] plan = getWeekplan(cname, url) wid = url.split('/')[-1] # e.g. 35-2018 title = plan.find('h3').text.strip() msg = semail.Message(cname, SECTION, unicode(plan)) msg.setTitle(title) msg.setMessageID(wid) msg.maybeSend() else: if u'ikke autoriseret' in bs.text: config.clog( cname, u'Din skole bruger ikke ugeplaner. ' u"Du bør bruge '--section ,-%s'" % SECTION)
def parseFrontpage(cname, bs): '''Look for new frontpage news items''' msgs = [] # Find potential interesting events today in the sidebar ul = bs.find('ul', 'sk-reminders-container') if ul: for li in ul.findAll('li', recursive=False): for c in li.contents: uc = unicode(c).strip().lower() if not uc: continue if u'har fødselsdag' in uc: today = unicode(time.strftime(u'%d. %b. %Y')) c.append(u" \U0001F1E9\U0001F1F0") # Unicode DK Flag sbs4.appendTodayComment(c) msg = semail.Message(cname, SECTION, unicode(c)) msg.setTitle(c.text.strip()) msg.setDateTime(today) msgs.append(msg) elif u'der er aktiviteter i dag' in uc: continue # ignore else: config.clog(cname, u'Hopper mini-besked %r over' % c.text.strip(), 2) # Find interesting main front page items fps = bs.findAll('div', 'sk-news-item') assert (len(fps) > 0) # 1+ msgs on the frontpage or something is wrong for div in fps[::-1]: msg = parseFrontpageItem(cname, div) msgs.append(msg) return msgs
def skoleOtherStuff(title, phtml): # some part of the frontpage, e.g., weekly schedule msg = semail.Message('frontpage', phtml) msg.setTitle(title) semail.maybeEmail(msg)
def skoleCoverPic(phtml): msg = semail.Message('frontpage', phtml) msg.setTitle(u'Nyt forsidebillede') msg.updatePersonDate() semail.maybeEmail(msg)
def docFindDocuments(bs, foldername='Dokumentarkiv'): '''Input beatifulsoup with content from a page of documents Looks at this and all subfolders, and sends any new messages''' trs = bs.findAll('tr') for line in trs: if not line.has_key('class'): continue if not [c for c in line['class'].split() if c.startswith('linje')]: continue links = line.findAll('a') assert(len(links) >= 2) # find file type ext = links[0].img['src'].split('/')[-1][2:-4].lower() # find name of file title = links[1].text ltitle = foldername + ' / ' + title # find url url = links[0]['href'] if 'visDokument' in url: url = URL_DOC + re.search('.*?(\d+)', links[0]['href']).group(1) else: assert('Dokliste' in url) url = urllib.quote(url.encode('iso-8859-1'), safe=':/?=&%') # find date dts = line.findAll('td', width='18%') assert(len(dts) == 1 and dts[0].text) # exactly one date date = dts[0].text # now do stuff if 'Dokliste' in url: # this is a subfolder # first look at (potentially cached version) suburl = URL_PREFIX + url subbs = surllib.skoleGetURL(suburl, True) subdate = datetime.date(*reversed(map(int, date.split('-')))) if subbs.cachedate <= subdate or \ (datetime.date.today() - subbs.cachedate).days > 2: # cached version is too old - refetch subbs = surllib.skoleGetURL(suburl, True, True) config.log(u'Kigger på folderen %s' % title) else: config.log(u'Kigger på folderen %s (fra cache)' % title) docFindDocuments(subbs, ltitle) else: # this is an actual document config.log(u'Kigger på dokumentet %s' % ltitle) # Create HTML snippet html = u"<p>Nyt dokument: <a href=''>%s</a></p>" % ltitle h = surllib.beautify(html) h.a['href'] = url h.a['usefilename'] = title + '.' + ext msg = semail.Message('documents', h) msg.setTitle(u'%s' % title) msg.setDate(date) msg.maybeSend()
def parseFrontpageItem(cname, div): '''Parse a single frontpage news item''' # Do we have any comments? comments = div.find('div', 'sk-news-item-comments') cdiv = u'' if comments: global c # Comments are enabled txt = comments.text.strip() if u'tilføj' not in txt.lower(): m = re.match(ur'.*vis (\d+) kommentar.*', txt.lower()) assert (m) nc = int(m.group(1)) if nc > 0: suff = '/news/pins/%s/comments' % div['data-feed-item-id'] url = schildren.getChildURL(cname, suff) bs = surllib.skoleGetURL(url, asSoup=True, postData={'_': str(nc)}) cdiv = unicode(bs.find('div', 'sk-comments-container')) cdiv = u'<br>' + cdiv author = div.find('div', 'sk-news-item-author') body = div.find('div', 'sk-news-item-content') # trim the body a bit body = sbs4.copy(body) # make a copy as we look for attachments later for e in body.select('.sk-news-item-footer, .sk-news-item-comments'): e.extract() for e in body.select('.h-fnt-bd'): e['style'] = 'font-weight: bold' for e in body.select('div'): # remove empty divs contents = u''.join(map(unicode, e.children)).strip() if not contents: e.extract() # Trim extra white space - sometimes unecessary linebreaks are introduced sbs4.trimSoup(body) msg = semail.Message(cname, SECTION, unicode(body) + cdiv) for e in body.select('span, strong, b, i'): e.unwrap() sbs4.condenseSoup(body) title = body.get_text(u'\n', strip=True).strip().split(u'\n')[0] title = title.replace(u'\xa0', u' ').strip() title = u' '.join(title.rstrip(u' .').split()) msg.setTitle(title, True) msg.setMessageID(div['data-feed-item-id']) msg.setSender(author.span.text) # Find list of recipients author.span.extract() # Remove author for tag in [ author.span, # Remove author author.find('span', 'sk-news-item-for'), # Remove 'til' author.find('span', 'sk-news-item-and'), # Remove ' og ' author.find('a', 'sk-news-show-more-link') ]: if tag: tag.extract() recp = re.sub(ur'\s*(,| og )\s*', ',', author.text.strip()) recp = recp.split(u',') msg.setRecipient(recp) msg.setDateTime(div.find('div', 'sk-news-item-timestamp').text) # Do we have any attachments? divA = div.find('div', 'sk-attachments-list') if divA: for att in (divA.findAll('a') or []): url = att['href'] text = att.text.strip() msg.addAttachment(url, text) return msg