def markMessageAsRead(cname, mid, isRead=True): assert (type(mid) in [str, unicode]) isRead = 'true' if isRead else 'false' url = schildren.getChildURL(cname, '/messages/UpdateMessagesReadState') data = {'selectionState[MessageIds][]': mid, 'isRead': isRead} config.clog(cname, u'Markerer besked #%s som læst' % mid, 3) surllib.skoleGetURL(url, noCache=True, postData=data)
def markMessageAsRead(cname, mid, isRead=True): assert(type(mid) in [str, unicode]) isRead = 'true' if isRead else 'false' url = schildren.getChildURL(cname, '/messages/UpdateMessagesReadState') data = {'selectionState[MessageIds][]': mid, 'isRead': isRead} config.clog(cname, u'Markerer besked #%s som læst' % mid, 3) surllib.skoleGetURL(url, noCache=True, postData=data)
def listsCheckList(postData, listtype): global bs, tbl # Fetch potential cached version bs = surllib.skoleGetURL(URL_MAIN, True, False, True, postData) if bs.cacheage > 6.9: bs = surllib.skoleGetURL(URL_MAIN, True, True, True, postData) tbl = bs.findAll('table')[2] if listtype == 'V6': # Remove links to pictures of parents for a in tbl.findAll('a'): a.replaceWithChildren() tr = tbl.find('tr') if tr.find('h2'): title = tr.find('h2').text tr.extract() elif tbl.find('h2'): title = tbl.find('h2').text else: title = u'Kontaktoplysninger' msg = semail.Message('contactList', tbl) msg.setTitle(title) semail.maybeEmail(msg)
def getMsgsForChild(cname): '''Find all new messages for a single child''' dtype = surllib.getBrowser().getState('dialogue') if dtype == 'conversations': # New more "gmail" like message view url = schildren.getChildURL(cname, '/messages/conversations') config.clog(cname, u'Kigger efter nye beskeder på %s' % url) bs = surllib.skoleGetURL(url, asSoup=True, noCache=True) return parseMessages(cname, bs) elif dtype == 'inbox': # Old message view msgs = [] for tray in ['inbox', 'outbox']: url = schildren.getChildURL(cname, '/messages/'+tray) config.clog(cname, u'Kigger efter nye beskeder på %s' % url) bs = surllib.skoleGetURL(url, asSoup=True, noCache=True) msgs += parseTrayMessages(cname, bs) return msgs else: config.clog(cname, u'Beskede-indbakke-type %r ikke understøttet' % dtype, 0) return []
def getMsgsForChild(cname): '''Find all new messages for a single child''' dtype = surllib.getBrowser().getState('dialogue') if dtype == 'conversations': # New more "gmail" like message view url = schildren.getChildURL(cname, '/messages/conversations') config.clog(cname, u'Kigger efter nye beskeder på %s' % url) bs = surllib.skoleGetURL(url, asSoup=True, noCache=True) return parseMessages(cname, bs) elif dtype == 'inbox': # Old message view msgs = [] for tray in ['inbox', 'outbox']: url = schildren.getChildURL(cname, '/messages/' + tray) config.clog(cname, u'Kigger efter nye beskeder på %s' % url) bs = surllib.skoleGetURL(url, asSoup=True, noCache=True) msgs += parseTrayMessages(cname, bs) return msgs else: config.clog(cname, u'Beskede-indbakke-type %r ikke understøttet' % dtype, 0) return []
def skoleSelectChild(name): global _children, URL_PREFIX assert name in _children if name == config.CHILDNAME: config.log(u"[%s] er allerede valgt som barn" % name) else: config.log(u"Vælger [%s]" % name) url = URL_PREFIX + _children[name] surllib.skoleGetURL(url, False, noCache=True) config.CHILDNAME = name
def skoleSelectChild(name): global _children assert (name in _children) if name == config.CHILDNAME: config.log(u'[%s] er allerede valgt som barn' % name) else: config.log(u'Vælger [%s]' % name) url = urlPrefix() + _children[name] surllib.skoleGetURL(url, False, noCache=True) config.CHILDNAME = name
def getMsgsForChild(cname): '''Look for new frontpage news''' url = schildren.getChildURL(cname, '/Index') config.clog(cname, u'Behandler forsiden %s' % url) bs = surllib.skoleGetURL(url, asSoup=True, noCache=True) return parseFrontpage(cname, bs)
def wpFindWeekplans(bs): trs = bs.findAll('tr') for line in trs: if not line.has_key('class'): continue if not [c for c in line['class'].split() if c.startswith('linje')]: continue links = line.findAll('a') assert(len(links) >= 1) # find week date title = links[0].text # find url url = links[0]['href'] url = url.encode('iso-8859-1') url = URL_PREFIX + urllib.quote(url, safe=':/?=&%') bs = surllib.skoleGetURL(url, True, True) bs = wpTrimPlan(bs) msg = semail.Message('weekplans', bs) msg.setTitle(u'%s' % title) msg.updatePersonDate() msg.maybeSend()
def skoleGetChildren(): '''Returns of list of "available" children in the system''' global _children # reset list of children _children = None # reset login surllib.resetSkoleLogin() # ensure that we are logged in surllib.skoleLogin() # done automatically later config.log(u'Henter liste af børn') if not _children: data = surllib.skoleGetURL(url(), asSoup=True, noCache=True) if not data: return [] _children = {} for a in data.findAll('a'): href = a['href'] name = a.span.text if name in NAMES_IGNORE: config.log(u'Ignorerer [%s]' % name) continue _children[name] = href return sorted(_children.keys())
def parseTrayMessages(cname, bs): '''Look for new messages in a message tray (old message view)''' msgs = [] for div in bs.select('.sk-message-list-item'): url = div.find('a')['href'] mid = re.findall('(?<=/message/)[0-9]+', url) assert (len(mid) == 1 and mid[0]) mid = mid[0] sender = div.find('li', 'sk-message-senderrecipient-name').text.strip() m = re.match(r'^([^(]*) \(.*\)$', sender) if m: sender = m.group(1) # We could also get the title title = div.find('div', 'sk-message-title').text.strip() if semail.hasSentMessage(tp=SECTION, mid=mid): continue config.clog(cname, u'Henter ny besked: %s - %s' % (sender, title), 2) bs = surllib.skoleGetURL(url, True) msg = parseTrayMessage(cname, bs, mid, sender) msgs.append(msg) return msgs
def wpFindWeekplans(bs): trs = bs.findAll('tr') for line in trs: if not line.has_key('class'): continue if not [c for c in line['class'].split() if c.startswith('linje')]: continue links = line.findAll('a') assert (len(links) >= 1) # find week date title = links[0].text # find url url = links[0]['href'] url = url.encode('iso-8859-1') url = urlPrefix() + urllib.quote(url, safe=':/?=&%') bs = surllib.skoleGetURL(url, True, True) bs = wpTrimPlan(bs) msg = semail.Message('weekplans', bs) msg.setTitle(u'%s' % title) msg.updatePersonDate() msg.maybeSend()
def skolePhotos(cname): 'Billeder' url = schildren.getChildURL(cname, '/photos/archives') bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) config.clog(cname, u'Kigger efter billeder') findPhotos(cname, bs)
def parseTrayMessages(cname, bs): '''Look for new messages in a message tray (old message view)''' msgs = [] for div in bs.select('.sk-message-list-item'): url = div.find('a')['href'] mid = re.findall('(?<=/message/)[0-9]+', url) assert(len(mid) == 1 and mid[0]) mid = mid[0] sender = div.find('li', 'sk-message-senderrecipient-name').text.strip() m = re.match(r'^([^(]*) \(.*\)$', sender) if m: sender = m.group(1) # We could also get the title title = div.find('div', 'sk-message-title').text.strip() if semail.hasSentMessage(tp=SECTION, mid=mid): continue config.clog(cname, u'Henter ny besked: %s - %s' % (sender, title), 2) bs = surllib.skoleGetURL(url, True) msg = parseTrayMessage(cname, bs, mid, sender) msgs.append(msg) return msgs
def skoleSignup(cname): 'Tilmelding til samtaler/arrangementer' config.clog(cname, u'Kigger efter nye samtaler/arrangementer') for suffix in ('conversation', 'event'): url = schildren.getChildURL(cname, '/signup/' + suffix) bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) findEvents(cname, bs)
def skoleContacts(cname): 'Kontaktinformation' config.clog(cname, u'Kigger efter ny kontaktinformation') url = schildren.getChildURL(cname, '/contacts/students/cards') bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) opts = bs.select('#sk-toolbar-contact-dropdown option') if not opts: config.clog(cname, u'Kan ikke finde nogen elever?') return for opt in opts: url = opt['value'] bs2 = surllib.skoleGetURL(url, True, bs.cacheage + .01) contactCard(cname, bs2)
def skoleWeekplans(): global bs surllib.skoleLogin() config.log(u'Kigger efter nye ugeplaner') # read the initial page bs = surllib.skoleGetURL(urlMain(), True, True) wpFindWeekplans(bs)
def skoleDocuments(): global bs # surllib.skoleLogin() config.log(u"Kigger efter nye dokumenter") # read the initial page bs = surllib.skoleGetURL(URL_MAIN, True, True) docFindDocuments(bs)
def skoleWeekplans(): global bs # surllib.skoleLogin() config.log(u'Kigger efter nye ugeplaner') # read the initial page bs = surllib.skoleGetURL(URL_MAIN, True, True) wpFindWeekplans(bs)
def skoleDocuments(): global bs # surllib.skoleLogin() config.log(u'Kigger efter nye dokumenter') # read the initial page bs = surllib.skoleGetURL(URL_MAIN, True, True) docFindDocuments(bs)
def checkForUpdates(): now = datetime.datetime.now() bs = surllib.skoleGetURL( '/notifications/v1?useNewerThan=False&pageSize=10', True, True) # Find lastUpdateURL links = bs.select('.sk-notifications-list li a') if links and links[0].has_attr('href'): updateURL = links[0]['href'] config.log(u'Sidste opdatering var til %s' % updateURL, 2) else: updateURL = None config.log(u'Kunne ikke finde sidst opdaterede side', 2) state = (now, updateURL) # Should we do a full check? br = surllib.getBrowser() if br.getState('lastUpdateURL') != updateURL: # New top update config.log(u'Kører fuld opdatering: Forventer nyt opslag/besked', 1) return (True, state) try: lut_ = br.getState('lastUpdateTime') lut = datetime.datetime.strptime(lut_, DT_FORMAT) if lut_ else None except ValueError: lut = None if not lut or now < lut: # lastUpdateTime is somehow wrong config.log(u'Kører fuld opdatering: Mangler tidsstempel fra ' u'sidste kørsel', 1) return (True, state) # Do a daily full check the first time we are accessed after 05:00 pit = now.replace(hour=5, minute=0, second=0, microsecond=0) if now.hour < 5: # Between midnight and 05:00, go back one day pit -= datetime.timedelta(1) if lut <= pit: # Last update was before config.log(u'Kører fuld opdatering: Første kørsel i dag', 1) return (True, state) # Did we NOT run this with the --quick parameter if config.options.fullupdate: config.log(u'Kører fuld opdatering selvom der ikke forventes ' u'nyt. Du bør bruge --quick', 1) return (True, state) # No need to run a full update config.log(u'Kører ikke fuld opdatering - der forventes intet nyt', 1) return (False, state)
def skoleExamineNews(url, mid): bs = surllib.skoleGetURL(url, True) # title + main text title = bs.h3.text main = bs.findAll('table')[3].table # create msg msg = semail.Message(u'dialogue', main) msg.setMessageID(mid) msg.setTitle(title) msg.updatePersonDate() semail.maybeEmail(msg)
def skoleDocuments(cname): 'Dokumenter' for rootTitle, folder in [('Klassens dokumenter', 'class')]: config.clog(cname, u'%s: Kigger efter dokumenter' % rootTitle) url = schildren.getChildURL(cname, '/documents/' + folder) bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) docFindDocuments(cname, rootTitle, bs, '') # look for sub folders js = bs.find(id='FoldersJson') if js and js.has_attr('value'): sfs = json.loads(js['value']) for sf in sfs: if sf[u'Name'].startswith('$'): continue title = sf[u'Title'] url = sf[u'Url'] bs = surllib.skoleGetURL(url, True, MAX_CACHE_AGE, None, True) docFindDocuments(cname, rootTitle, bs, title)
def findPhotos(cname, bs): prefix = schildren.getChildURLPrefix(cname) for opt in bs.select('#sk-photos-toolbar-filter option'): if not opt.has_attr('value'): continue url = surllib.absurl(opt['value']) folder = opt.text.strip() if not url.startswith(prefix): config.clog(cname, u'Billeder: %s: ukendt URL %r' % (folder, opt['value'])) continue bs2 = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) findPhotosInFolder(cname, url, bs2)
def findPhotos(cname, bs): prefix = schildren.getChildURLPrefix(cname) for opt in bs.select('#sk-photos-toolbar-filter option'): if not opt.has_attr('value'): continue url = surllib.absurl(opt['value']) folder = opt.text.strip() if not url.startswith(prefix): config.clog( cname, u'Billeder: %s: ukendt URL %r' % (folder, opt['value'])) continue bs2 = surllib.skoleGetURL(url, True, MAX_CACHE_AGE) findPhotosInFolder(cname, url, bs2)
def skoleGetChildren(): '''Returns of list of "available" children in the system''' global URL, _children # ensure that we are logged in # surllib.skoleLogin() # done automatically later config.log(u'Henter liste af børn') if not _children: data = surllib.skoleGetURL(URL, asSoup=True, noCache=True) _children = {} for a in data.findAll('a'): href = a['href'] name = a.span.text _children[name] = href return sorted(_children.keys())
def diaExamineMessage(url, mid): '''Look at the url and mid. Returns True iff an email was sent''' bs = surllib.skoleGetURL(url, True) # first, find main text tr = bs.find('tr', valign='top') assert(tr) phtml = tr.find('td') msg = semail.Message(u'dialogue', phtml) msg.setMessageID(mid) # next, look at the header header = bs.find('table', 'linje1') assert(header) # there must be a header headerLines = header.findAll('tr') assert(len(headerLines) >= 3) # there must be something inside the header for hl in headerLines: txt = hl.text if not txt: continue # ignore elif txt.startswith(u'Denne besked slettes'): pass # ignore elif hl.find('h4'): # title msg.setTitle(txt) elif txt.startswith(u'Besked fra') or txt.startswith(u'Oprettet af'): # Besked fra Frk Nielsen - modtaget den 26-09-2012 20:29:44 msg.updatePersonDate(hl) elif txt.startswith(u'Sendt til '): # Sendt til ... msg.setRecipient(txt.split(u' ', 2)[-1]) elif txt.startswith(u'Kopi til '): # Sendt til ... msg.setCC(txt.split(u' ', 2)[-1]) else: config.log(u'Ukendt header i besked #%s: %s' % (mid, txt), -1) return msg.maybeSend()
def diaExamineMessage(url, mid): '''Look at the url and mid. Returns True iff an email was sent''' bs = surllib.skoleGetURL(url, True) # first, find main text tr = bs.find('tr', valign='top') assert (tr) phtml = tr.find('td') msg = semail.Message(u'dialogue', phtml) msg.setMessageID(mid) # next, look at the header header = bs.find('table', 'linje1') assert (header) # there must be a header headerLines = header.findAll('tr') assert (len(headerLines) >= 3) # there must be something inside the header for hl in headerLines: txt = hl.text if not txt: continue # ignore elif txt.startswith(u'Denne besked slettes'): pass # ignore elif hl.find('h4'): # title msg.setTitle(txt) elif txt.startswith(u'Besked fra') or txt.startswith(u'Oprettet af'): # Besked fra Frk Nielsen - modtaget den 26-09-2012 20:29:44 msg.updatePersonDate(hl) elif txt.startswith(u'Sendt til '): # Sendt til ... msg.setRecipient(txt.split(u' ', 2)[-1]) elif txt.startswith(u'Kopi til '): # Sendt til ... msg.setCC(txt.split(u' ', 2)[-1]) else: config.log(u'Ukendt header i besked #%s: %s' % (mid, txt), -1) return msg.maybeSend()
def skoleContactLists(): global bs, lists config.log(u'Kigger efter nye adresser') # read the initial page bs = surllib.skoleGetURL(URL_MAIN, True, False, True) # Setup post request postData = {} for inpd in [{'id': 'fSkjult'}, {'type': 'submit'}]: inp = bs.find('input', **inpd) if not inp: config.log(u'pgContactLists: INPUT med %s ej fundet' % repr(inpd)) return postData[inp['name']] = inp['value'] lists = None for sel in bs.findAll('select'): fst = sel.option['value'] if sel['name'] in ['fKlasse', 'fSortering']: postData[sel['name']] = fst elif sel['name'] == 'R1': lists = sel else: # Unknown SELECT found config.log(u'pgContactLists: Ukendt SELECT: %s' % sel['name']) return if not lists: config.log(u'pgContactLists: SELECT med mulige lister ej fundet') return for opt in lists.findAll('option'): if opt['value'] in LISTS_TO_SEND: postData[lists['name']] = opt['value'] listsCheckList(postData, opt['value'])
def skoleWeekplans(cname): 'Ugeplaner' config.clog(cname, u'Kigger efter nye ugeplaner') url = schildren.getChildURL(cname, 'item/weeklyplansandhomework/list/') bs = surllib.skoleGetURL(url, True, noCache=True) ul = bs.find('ul', 'sk-weekly-plans-list-container') if ul: for a in ul.find_all('a', href=True): url = a['href'] plan = getWeekplan(cname, url) wid = url.split('/')[-1] # e.g. 35-2018 title = plan.find('h3').text.strip() msg = semail.Message(cname, SECTION, unicode(plan)) msg.setTitle(title) msg.setMessageID(wid) msg.maybeSend() else: if u'ikke autoriseret' in bs.text: config.clog(cname, u'Din skole bruger ikke ugeplaner. ' u"Du bør bruge '--section ,-%s'" % SECTION)
def skoleWeekplans(cname): 'Ugeplaner' config.clog(cname, u'Kigger efter nye ugeplaner') url = schildren.getChildURL(cname, 'item/weeklyplansandhomework/list/') bs = surllib.skoleGetURL(url, True, noCache=True) ul = bs.find('ul', 'sk-weekly-plans-list-container') if ul: for a in ul.find_all('a', href=True): url = a['href'] plan = getWeekplan(cname, url) wid = url.split('/')[-1] # e.g. 35-2018 title = plan.find('h3').text.strip() msg = semail.Message(cname, SECTION, unicode(plan)) msg.setTitle(title) msg.setMessageID(wid) msg.maybeSend() else: if u'ikke autoriseret' in bs.text: config.clog( cname, u'Din skole bruger ikke ugeplaner. ' u"Du bør bruge '--section ,-%s'" % SECTION)
def skoleGetChildren(): '''Returns of list of "available" children in the system''' global URL, _children # ensure that we are logged in # surllib.skoleLogin() # done automatically later config.log(u'Henter liste af børn') if not _children: data = surllib.skoleGetURL(URL, asSoup=True, noCache=True) _children = {} for a in data.findAll('a'): href = a['href'] name = a.span.text if name == SKOLEBESTYRELSE_NAME: config.log(u'Ignorerer [%s]' % name) continue _children[name] = href return sorted(_children.keys())
def skoleGetChildren(): """Returns of list of "available" children in the system""" global URL, _children # ensure that we are logged in # surllib.skoleLogin() # done automatically later config.log(u"Henter liste af børn") if not _children: data = surllib.skoleGetURL(URL, asSoup=True, noCache=True) _children = {} for a in data.findAll("a"): href = a["href"] name = a.span.text if name in NAMES_IGNORE: config.log(u"Ignorerer [%s]" % name) continue _children[name] = href return sorted(_children.keys())
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title, tag=''): if title: title += u': ' val = mpp.get(key, None) if val: if tag: val = u'<%s>%s</%s>' % (tag, val, tag.split()[0]) mpp[key] = (u"<span style='font-size: 15px'>" u"%s%s</span><br>\n ") % (title, val) else: mpp[key] = '' wrapOrZap('sender', '', 'b style="font-size: 17px"') wrapOrZap('recipient', 'Til') wrapOrZap('cc', 'Kopi til') # create initial HTML version html = u'''<!DOCTYPE html> <html lang="da"> <head> <meta charset="utf-8"> <title>%(title)s</title> </head> <body style='font-family: Helvetica, sans-serif; font-size: 14px;'> <h1>%(title)s</h1> <div class='hd' style='padding:5px;background-color:#eee;margin-bottom:15px;'> %(sender)s%(recipient)s%(cc)s<span>%(date_string)s</span> </div> <div class='text'> %(html)s </div> </body> </html>''' % mpp html = sbs4.beautify(html) # First look for inline images, if any # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): if not imgtag.has_attr('src'): continue # ignore url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid # Next, handle attachments # attachments: email attachments ready for attachment :) attachments = [] for atag in html.findAll('a'): try: url = atag['href'] except KeyError: atag.replaceWithChildren() # kill the "broken" link continue url = atag['href'] if url.startswith('/') or config.options.hostname in url: # onsite data = None try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # unable to fetch URL config.log( u'%s: Kan ikke hente flg. URL: %s' % (self.mp['title'] if self.mp['title'] else self, url)) if data: eatt = generateMIMEAttachment(url, data, None) attachments.append(eatt) atag.replaceWithChildren() # kill the actual link # Attach actual attachments (if any) for (url, text) in self.mp['attatchments']: data = surllib.skoleGetURL(url, False) eatt = generateMIMEAttachment(url, data, text) attachments.append(eatt) # Now, put the pieces together html = html.prettify() msgHtml = MIMEText(html, 'html', 'utf-8') if not iimgs and not attachments: # pure HTML version msg = msgHtml else: # Inline images but no attachments # multipart/related # text/html with html text # image/xxx with inline images # OR # email with inline images + attachment # multipart/mixed # text/html med html udgave # image/gif med billede # application/xxx with word document if attachments: msg = MIMEMultipart('mixed', type='text/html') else: msg = MIMEMultipart('related', type='text/html') del msgHtml['MIME-Version'] msg.attach(msgHtml) # Attach images if any for (url, (cid, data)) in iimgs.items(): m = MIMEImage(data) m.add_header('Content-ID', '<%s>' % cid) fn = niceFilename(url) m.add_header('Content-Disposition', 'inline', filename=headerEncodeField(fn)) del m['MIME-Version'] msg.attach(m) # Attach attachments if any for attachment in attachments: del attachment['MIME-Version'] msg.attach(attachment) # Now, for the general headers dt = email.utils.formatdate(time.mktime(self.mp['date_ts']), True) msg['Received'] = ('from %s ([127.0.0.1] helo=%s) ' 'by %s with smtp (fskintra) for %s; %s') % ( hostname, hostname, hostname, config.options.email, dt) msg['Date'] = dt title = self.mp['title'] if self.mp['children']: title = u'[%s] %s' % (', '.join(self.mp['children']), title) msg['Subject'] = headerEncodeField(title, 60) if 'sender' in self.mp and self.mp['sender']: sender = u'Skoleintra - %s' % self.mp['sender'] else: sender = u'Skoleintra' sender = '%s <%s>' % (headerEncodeField(sender), config.options.senderemail) msg['From'] = sender msg['To'] = config.options.email # Other tags just for ourselves keys = 'mid,md5'.split(',') for key in keys: if self.mp.get(key, None): kkey = 'X-skoleintra-%s' % key msg[kkey] = headerEncodeField(self.mp[key], 60) self._email = msg return msg
def parseFrontpageItem(cname, div): '''Parse a single frontpage news item''' # Do we have any comments? comments = div.find('div', 'sk-news-item-comments') cdiv = u'' if comments: global c # Comments are enabled txt = comments.text.strip() if u'tilføj' not in txt.lower(): m = re.match(ur'.*vis (\d+) kommentar.*', txt.lower()) assert (m) nc = int(m.group(1)) if nc > 0: suff = '/news/pins/%s/comments' % div['data-feed-item-id'] url = schildren.getChildURL(cname, suff) bs = surllib.skoleGetURL(url, asSoup=True, postData={'_': str(nc)}) cdiv = unicode(bs.find('div', 'sk-comments-container')) cdiv = u'<br>' + cdiv author = div.find('div', 'sk-news-item-author') body = div.find('div', 'sk-news-item-content') # trim the body a bit body = sbs4.copy(body) # make a copy as we look for attachments later for e in body.select('.sk-news-item-footer, .sk-news-item-comments'): e.extract() for e in body.select('.h-fnt-bd'): e['style'] = 'font-weight: bold' for e in body.select('div'): # remove empty divs contents = u''.join(map(unicode, e.children)).strip() if not contents: e.extract() # Trim extra white space - sometimes unecessary linebreaks are introduced sbs4.trimSoup(body) msg = semail.Message(cname, SECTION, unicode(body) + cdiv) for e in body.select('span, strong, b, i'): e.unwrap() sbs4.condenseSoup(body) title = body.get_text(u'\n', strip=True).strip().split(u'\n')[0] title = title.replace(u'\xa0', u' ').strip() title = u' '.join(title.rstrip(u' .').split()) msg.setTitle(title, True) msg.setMessageID(div['data-feed-item-id']) msg.setSender(author.span.text) # Find list of recipients author.span.extract() # Remove author for tag in [ author.span, # Remove author author.find('span', 'sk-news-item-for'), # Remove 'til' author.find('span', 'sk-news-item-and'), # Remove ' og ' author.find('a', 'sk-news-show-more-link') ]: if tag: tag.extract() recp = re.sub(ur'\s*(,| og )\s*', ',', author.text.strip()) recp = recp.split(u',') msg.setRecipient(recp) msg.setDateTime(div.find('div', 'sk-news-item-timestamp').text) # Do we have any attachments? divA = div.find('div', 'sk-attachments-list') if divA: for att in (divA.findAll('a') or []): url = att['href'] text = att.text.strip() msg.addAttachment(url, text) return msg
def parseMessages(cname, bs): '''Look for new messages in each conversation''' # Look for a div with a very long attribute with json main = bs.find('div', 'sk-l-content-wrapper') conversations = None for d in main.findAll('div'): for a in d.attrs: if 'message' not in a.lower() or len(d[a]) < 100: continue try: jsn = json.loads(d[a]) if type(jsn) == dict: conversations = jsn.get('Conversations') break except ValueError: continue if not conversations: config.clog(cname, 'Ingen beskeder fundet?!?', -1) return [] emsgs = [] for i, c in enumerate(conversations[::1]): tid = c.get('ThreadId') lmid = unicode(c.get('LatestMessageId')) if not tid: # ThreadId can be empty if this is a msg to all students tid = '' if not lmid: config.clog(cname, u'Noget galt i tråd #%d %r %r' % (i, tid, lmid), -1) continue if semail.hasSentMessage(tp=SECTION, mid=(tid, lmid)): continue # This last messages has not been seen - load the entire conversation if tid: suffix = ( '/messages/conversations/loadmessagesforselectedconversation' + '?threadId=' + tid + '&takeFromRootMessageId=' + lmid + '&takeToMessageId=0' + '&searchRequest=') else: suffix = ( '/messages/conversations/getmessageforthreadlessconversation' + '?messageId=' + lmid) curl = schildren.getChildURL(cname, suffix) data = surllib.skoleGetURL(curl, asSoup=False, noCache=True, addTimeSuffix=True) try: jsn = json.loads(data) except ValueError: config.clog(cname, 'Kan ikke indlæse besked-listen i tråd %d %r %r' % (i, tid, lmid), -1) continue msgs = jsn if tid else [jsn] assert(type(msgs) == list) for jsn in msgs[::-1]: mid = unicode(jsn.get('Id')) if semail.hasSentMessage(tp=SECTION, mid=(tid, mid)): continue # Generate new messages with this content emsgs.append(msgFromJson(cname, jsn, tid)) return emsgs
def docFindDocuments(bs, foldername='Dokumentarkiv'): '''Input beatifulsoup with content from a page of documents Looks at this and all subfolders, and sends any new messages''' trs = bs.findAll('tr') for line in trs: if not line.has_key('class'): continue if not [c for c in line['class'].split() if c.startswith('linje')]: continue links = line.findAll('a') assert(len(links) >= 2) # find file type ext = links[0].img['src'].split('/')[-1][2:-4].lower() # find name of file title = links[1].text ltitle = foldername + ' / ' + title # find url url = links[0]['href'] if 'visDokument' in url: url = URL_DOC + re.search('.*?(\d+)', links[0]['href']).group(1) else: assert('Dokliste' in url) url = urllib.quote(url.encode('iso-8859-1'), safe=':/?=&%') # find date dts = line.findAll('td', width='18%') assert(len(dts) == 1 and dts[0].text) # exactly one date date = dts[0].text # now do stuff if 'Dokliste' in url: # this is a subfolder # first look at (potentially cached version) suburl = URL_PREFIX + url subbs = surllib.skoleGetURL(suburl, True) subdate = datetime.date(*reversed(map(int, date.split('-')))) if subbs.cachedate <= subdate or \ (datetime.date.today() - subbs.cachedate).days > 2: # cached version is too old - refetch subbs = surllib.skoleGetURL(suburl, True, True) config.log(u'Kigger på folderen %s' % title) else: config.log(u'Kigger på folderen %s (fra cache)' % title) docFindDocuments(subbs, ltitle) else: # this is an actual document config.log(u'Kigger på dokumentet %s' % ltitle) # Create HTML snippet html = u"<p>Nyt dokument: <a href=''>%s</a></p>" % ltitle h = surllib.beautify(html) h.a['href'] = url h.a['usefilename'] = title + '.' + ext msg = semail.Message('documents', h) msg.setTitle(u'%s' % title) msg.setDate(date) msg.maybeSend()
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title, tag=''): if title: title += u': ' val = mpp.get(key, None) if val: if tag: val = u'<%s>%s</%s>' % (tag, val, tag.split()[0]) mpp[key] = (u"<span style='font-size: 15px'>" u"%s%s</span><br>\n ") % (title, val) else: mpp[key] = '' wrapOrZap('sender', '', 'b style="font-size: 17px"') wrapOrZap('recipient', 'Til') wrapOrZap('cc', 'Kopi til') # create initial HTML version html = u'''<!DOCTYPE html> <html lang="da"> <head> <meta charset="utf-8"> <title>%(title)s</title> </head> <body style='font-family: Helvetica, sans-serif; font-size: 14px;'> <h1>%(title)s</h1> <div class='hd' style='padding:5px;background-color:#eee;margin-bottom:15px;'> %(sender)s%(recipient)s%(cc)s<span>%(date_string)s</span> </div> <div class='text'> %(html)s </div> </body> </html>''' % mpp html = sbs4.beautify(html) # First look for inline images, if any # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): if not imgtag.has_attr('src'): continue # ignore url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid # Next, handle attachments # attachments: email attachments ready for attachment :) attachments = [] for atag in html.findAll('a'): try: url = atag['href'] except KeyError: atag.replaceWithChildren() # kill the "broken" link continue url = atag['href'] if url.startswith('/') or config.options.hostname in url: # onsite data = None try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # unable to fetch URL config.log(u'%s: Kan ikke hente flg. URL: %s' % (self.mp['title'] if self.mp['title'] else self, url)) if data: eatt = generateMIMEAttachment(url, data, None) attachments.append(eatt) atag.replaceWithChildren() # kill the actual link # Attach actual attachments (if any) for (url, text) in self.mp['attatchments']: data = surllib.skoleGetURL(url, False) eatt = generateMIMEAttachment(url, data, text) attachments.append(eatt) # Now, put the pieces together html = html.prettify() msgHtml = MIMEText(html, 'html', 'utf-8') if not iimgs and not attachments: # pure HTML version msg = msgHtml else: # Inline images but no attachments # multipart/related # text/html with html text # image/xxx with inline images # OR # email with inline images + attachment # multipart/mixed # text/html med html udgave # image/gif med billede # application/xxx with word document if attachments: msg = MIMEMultipart('mixed', type='text/html') else: msg = MIMEMultipart('related', type='text/html') del msgHtml['MIME-Version'] msg.attach(msgHtml) # Attach images if any for (url, (cid, data)) in iimgs.items(): m = MIMEImage(data) m.add_header('Content-ID', '<%s>' % cid) fn = niceFilename(url) m.add_header('Content-Disposition', 'inline', filename=headerEncodeField(fn)) del m['MIME-Version'] msg.attach(m) # Attach attachments if any for attachment in attachments: del attachment['MIME-Version'] msg.attach(attachment) # Now, for the general headers dt = email.utils.formatdate(time.mktime(self.mp['date_ts']), True) msg['Received'] = ('from %s ([127.0.0.1] helo=%s) ' 'by %s with smtp (fskintra) for %s; %s' ) % (hostname, hostname, hostname, config.options.email, dt) msg['Date'] = dt title = self.mp['title'] if self.mp['children']: title = u'[%s] %s' % (', '.join(self.mp['children']), title) msg['Subject'] = headerEncodeField(title, 60) if 'sender' in self.mp and self.mp['sender']: sender = u'Skoleintra - %s' % self.mp['sender'] else: sender = u'Skoleintra' sender = '%s <%s>' % (headerEncodeField(sender), config.options.senderemail) msg['From'] = sender msg['To'] = config.options.email # Other tags just for ourselves keys = 'mid,md5'.split(',') for key in keys: if self.mp.get(key, None): kkey = 'X-skoleintra-%s' % key msg[kkey] = headerEncodeField(self.mp[key], 60) self._email = msg return msg
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title): val = self.mp.get(key, None) if val: mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n" mpp[key] %= (key, title, val) else: mpp[key] = '' wrapOrZap('sender', 'Fra') wrapOrZap('recipient', 'Til') if mpp.get('time', None): mpp['ttime'] = u' ' + mpp['time'] else: mpp['ttime'] = u'' # create initial HTML version html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>%(title)s</title> </head> <body style='font-family: Verdana,Arial,Helvetica'> <h1>%(title)s</h1> <div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'> %(sender)s%(recipient)s <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p> </div> <div class='text'> %(nicehtml)s </div> </body> </html> ''' html %= mpp html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) # first look for inline images (if any) # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError, e: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid
def getWeekplan(cname, url): bs = surllib.skoleGetURL(url, True, noCache=True) return formatWeekplan(bs)
class Message: def __init__(self, type, phtml): self.mp = {} self.mp['type'] = type # frontpage or ... self.mp['phtml'] = phtml # use self.data in general self.mp['data'] = str(phtml).decode('utf-8') self.mp['childname'] = config.CHILDNAME # not set by constructor self.mp['title'] = None self.mp['date'] = None self.mp['time'] = None self.mp['sender'] = None self.mp['recipient'] = None self.mp['mid'] = None self._email = None def __repr__(self): txt = u'<semail.Message' keys = 'type,mid,date,time,title,sender'.split(',') for key in keys: if key in self.mp and self.mp[key]: txt += u' %s=%s' % (key, repr(self.mp[key])) txt += u'>' return txt def setTitle(self, title, shorten=False): if shorten and len(title) > 40: title = title[:40] + title[40:].split(' ', 2)[0] + '...' self.mp['title'] = title def setDate(self, date): date = date.strip() if ' ' in date: # also time date, time = date.split() self.setTime(time) self.mp['date'] = date def setTime(self, time): self.mp['time'] = time def setSender(self, sender): self.mp['sender'] = sender def setRecipient(self, recipient): self.mp['recipient'] = recipient def setMessageID(self, mid): self.mp['mid'] = mid def updatePersonDate(self, phtml=None): if phtml: d = phtml.renderContents().decode('utf-8') else: d = self.mp['data'] assert (type(d) == unicode) # must be unicode # e.g. front page pics m = re.findall(u'>(?:Lagt ind|Skrevet) af ([^<]*?) den ([-0-9]*?)<', d) if m: m = m[-1] self.setSender(m[0]) self.setDate(m[1]) return m = re.findall( u'(?s)<small>Besked fra([^<]*?) - (?:modtaget|sendt) ' u'den ([^<]*?)</small>', d) if not m: m = re.findall( u'(?s)<small>Oprettet af([^<]*?) ' u'den ([^<]*?)</small>', d) if m: m = m[0] self.setSender(m[0].strip()) self.setDate(m[1].strip()) return else: # neither Sender nor date/time found config.log('No sender found', 2) return def prepareMessage(self): # add missing fields, if any if not self.mp.get('md5', None): keys = 'type,date,title,data'.split(',') txt = u' '.join([self.mp[x] for x in keys if self.mp.get(x, None)]) self.mp['md5'] = unicode(md5.md5(txt.encode('utf-8')).hexdigest()) if not self.mp.get('date', None): # use today as the date self.setDate(time.strftime('%d-%m-%Y')), # today # create nice version of the raw html if not 'nicehtml' in self.mp: self.mp['nicehtml'] = nicehtml(self.mp['data']) def getMessageID(self): if self.mp.get('mid', None): return self.mp['mid'] else: self.prepareMessage() return self.mp['md5'] def getLongMessageID(self): dt = '-'.join(reversed(self.mp['date'].split('-'))) return '%s--%s' % (dt, self.getMessageID()) def hasBeenSent(self): ''' Tests whether this email has previously been sent''' mid = self.getMessageID() old = glob.glob(os.path.join(config.MSG_DN, '*--%s' % mid)) return old def store(self): mid = self.getMessageID() dn = os.path.join(config.MSG_DN, self.getLongMessageID()) if os.path.isdir(dn): # already stored - ignore! return False tdn = dn + '.tmp' if os.path.isdir(tdn): config.log('Removing previous temporary directory %s' % repr(tdn), 2) shutil.rmtree(tdn) # Remove stuff os.mkdir(tdn) fd = open(os.path.join(tdn, mid + '.eml'), 'wb') fd.write(str(self.asEmail())) fd.close() mpp = [(unicode(k), unicode(v)) for (k, v) in self.mp.items()] fd = codecs.open(os.path.join(tdn, mid + '.txt'), 'wb', 'utf-8') fd.write(repr(mpp)) fd.close() os.rename(tdn, dn) return True def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title): val = self.mp.get(key, None) if val: mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n" mpp[key] %= (key, title, val) else: mpp[key] = '' wrapOrZap('sender', 'Fra') wrapOrZap('recipient', 'Til') if mpp.get('time', None): mpp['ttime'] = u' ' + mpp['time'] else: mpp['ttime'] = u'' # create initial HTML version html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>%(title)s</title> </head> <body style='font-family: Verdana,Arial,Helvetica'> <h1>%(title)s</h1> <div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'> %(sender)s%(recipient)s <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p> </div> <div class='text'> %(nicehtml)s </div> </body> </html> ''' html %= mpp html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) # first look for inline images (if any) # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError, e: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid # next, handle attachments # attachments: email attachments ready for attachment :) attachments = [] for atag in html.findAll('a'): try: url = atag['href'] except KeyError: atag.replaceWithChildren() # kill the "broken" link continue url = atag['href'] if 'Tilmelding/Oversigt.asp' in url: atag.replaceWithChildren() # kill link continue if url.startswith('/') or config.HOSTNAME in url: # onsite! data = None try: data = surllib.skoleGetURL(url, False) except: # unable to fetch URL config.log( u'%s: Kan ikke hente flg. URL: %s' % (self.mp['title'] if self.mp['title'] else self, url)) if data: if atag.has_key('usefilename'): usefilename = atag['usefilename'] else: usefilename = None eatt = generateMIMEAttachment(url, data, usefilename) attachments.append(eatt) atag.replaceWithChildren() # kill the actual link # now, put the pieces together html = html.prettify().decode('utf-8') msgHtml = MIMEText(html, 'html', 'utf-8') if not iimgs and not attachments: # pure HTML version msg = msgHtml else: # inline images but no attachments # multipart/related # text/html with html text # image/xxx with inline images # OR # email with inline images + attachment # multipart/mixed # text/html med html udgave # image/gif med billede # application/xxx with word document if attachments: msg = MIMEMultipart('mixed', type='text/html') else: msg = MIMEMultipart('related', type='text/html') del msgHtml['MIME-Version'] msg.attach(msgHtml) # attach images if any for (url, (cid, data)) in iimgs.items(): m = MIMEImage(data) m.add_header('Content-ID', '<%s>' % cid) fn = os.path.basename(url).encode('utf-8') m.add_header('Content-Disposition', 'inline', filename=('utf-8', '', fn)) del m['MIME-Version'] msg.attach(m) # attach attachments if any for attachment in attachments: del attachment['MIME-Version'] msg.attach(attachment) # now for the general headers dt = self.mp['date'] if self.mp.get('time', None): dt += ' ' + self.mp['time'] else: if dt == time.strftime('%d-%m-%Y'): # today ts = time.strftime('%H:%M:%S') if ts > '12:00:00': ts = '12:00:00' dt += ' ' + ts else: dt += ' 12:00:00' dt = time.strptime(dt, '%d-%m-%Y %H:%M:%S') dt = email.utils.formatdate(time.mktime(dt), True) msg['Received'] = ('from %s ([127.0.0.1] helo=%s) ' 'by %s with smtp (fskintra) for %s; %s') % ( hostname, hostname, hostname, config.EMAIL, dt) msg['Date'] = dt title = self.mp['title'] if self.mp['childname']: title = u'[%s] %s' % (self.mp['childname'], title) msg['Subject'] = Header(title, 'utf-8', 60) if 'sender' in self.mp and self.mp['sender']: sender = u'Skoleintra - %s' % self.mp['sender'] else: sender = u'Skoleintra' sender = headerEncodeField(sender) + u' <%s>' % config.SENDER msg['From'] = sender msg['To'] = config.EMAIL # other tags just for ourselves keys = 'mid,md5'.split(',') for key in keys: if self.mp.get(key, None): kkey = 'X-skoleintra-%s' % key msg[kkey] = Header(self.mp[key], 'utf-8', header_name=kkey) self._email = msg return msg
def parseFrontpageItem(cname, div): '''Parse a single frontpage news item''' # Do we have any comments? comments = div.find('div', 'sk-news-item-comments') cdiv = u'' if comments: global c # Comments are enabled txt = comments.text.strip() if u'tilføj' not in txt.lower(): m = re.match(ur'.*vis (\d+) kommentar.*', txt.lower()) assert(m) nc = int(m.group(1)) if nc > 0: suff = '/news/pins/%s/comments' % div['data-feed-item-id'] url = schildren.getChildURL(cname, suff) bs = surllib.skoleGetURL(url, asSoup=True, postData={'_': str(nc)}) cdiv = unicode(bs.find('div', 'sk-comments-container')) cdiv = u'<br>' + cdiv author = div.find('div', 'sk-news-item-author') body = div.find('div', 'sk-news-item-content') # trim the body a bit body = sbs4.copy(body) # make a copy as we look for attachments later for e in body.select('.sk-attachments-list, .sk-news-item-comments'): e.extract() for e in body.select('.h-fnt-bd'): e['style'] = 'font-weight: bold' for e in body.select('div'): # remove empty divs contents = u''.join(map(unicode, e.children)).strip() if not contents: e.extract() # Trim extra white space - sometimes unecessary linebreaks are introduced sbs4.trimSoup(body) msg = semail.Message(cname, SECTION, unicode(body)+cdiv) for e in body.select('span, strong, b, i'): e.unwrap() sbs4.condenseSoup(body) title = body.get_text(u'\n', strip=True).strip().split(u'\n')[0] title = title.replace(u'\xa0', u' ').strip() title = u' '.join(title.rstrip(u' .').split()) msg.setTitle(title, True) msg.setMessageID(div['data-feed-item-id']) msg.setSender(author.span.text) # Find list of recipients author.span.extract() # Remove author for tag in [ author.span, # Remove author author.find('span', 'sk-news-item-for'), # Remove 'til' author.find('span', 'sk-news-item-and'), # Remove ' og ' author.find('a', 'sk-news-show-more-link')]: if tag: tag.extract() recp = re.sub(ur'\s*(,| og )\s*', ',', author.text.strip()) recp = recp.split(u',') msg.setRecipient(recp) msg.setDateTime(div.find('div', 'sk-news-item-timestamp').text) # Do we have any attachments? divA = div.find('div', 'sk-attachments-list') if divA: for att in (divA.findAll('a') or []): url = att['href'] text = att.text.strip() msg.addAttachment(url, text) return msg
def skoleFrontpage(): surllib.skoleLogin() config.log('Behandler forsiden') url = 'http://%s/Infoweb/Fi2/Forside.asp' % config.HOSTNAME data = surllib.skoleGetURL(url, asSoup=True, noCache=True) br = surllib.getBrowser() aurl = br.geturl() if u'Personoplysninger.asp' in aurl: # We are actually asked to confirm our personal data config.log(u'Bekræfter først vores personlige data') skoleConfirmPersonalData(data) data = surllib.skoleGetURL(url, asSoup=True, noCache=True) # find main table maint = [] for mt in data.findAll('table'): if mt.findParents('table') or mt.has_key('bgcolor'): continue txt = mt.text if len(txt) < 30 and txt.lower().startswith(u'forældreintra for '): continue # just the title maint.append(mt) assert(len(maint) == 1) # assume exactly one main table maint = maint[0] # find interesting table tags itags = [] for tag in maint: for ttag in tag.findAll('table'): if ttag.text: itags.append(ttag) g = [] for itag in itags: t = _getTitle(itag) if t is None: # not a title if not g: # In some cases (slideshows), the real title may be missing g.append((itags[0].text, [])) g[-1][1].append(itag) else: # we have a new title g.append((t, [])) for (t, xs) in g: ignore = len(xs) == 0 or t in TITLE_IGNORE config.log(u'Kategori [%s]%s' % (t, ' (hoppes over)' if ignore else '')) if ignore: continue if t == TITLE_COVERPIC: assert(len(xs) == 1) # exactly one cover picture skoleCoverPic(xs[0]) continue elif t == TITLE_BBB: # BBB news are split # ignore first table which is a wrapper around all entries xs = xs[1:] map(skoleFrontBBB, xs) elif t == TITLE_NEWS: # News from... skoleNewsFrom(xs) else: # send msg if something has changed for x in xs: skoleOtherStuff(t, x)
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title): val = self.mp.get(key, None) if val: mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n" mpp[key] %= (key, title, val) else: mpp[key] = '' wrapOrZap('sender', 'Fra') wrapOrZap('recipient', 'Til') wrapOrZap('cc', 'Kopi til') if mpp.get('time', None): mpp['ttime'] = u' ' + mpp['time'] else: mpp['ttime'] = u'' # create initial HTML version html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>%(title)s</title> </head> <body style='font-family: Verdana,Arial,Helvetica'> <h1>%(title)s</h1> <div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'> %(sender)s%(recipient)s%(cc)s <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p> </div> <div class='text'> %(nicehtml)s </div> </body> </html> ''' html %= mpp html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) # first look for inline images (if any) # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): if not imgtag.has_key('src'): continue # ignore url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid # next, handle attachments # attachments: email attachments ready for attachment :) attachments = [] for atag in html.findAll('a'): try: url = atag['href'] except KeyError: atag.replaceWithChildren() # kill the "broken" link continue url = atag['href'] if 'Tilmelding/Oversigt.asp' in url: atag.replaceWithChildren() # kill link continue if url.startswith('/') or config.HOSTNAME in url: # onsite! data = None try: data = surllib.skoleGetURL(url, False) except: # unable to fetch URL config.log( u'%s: Kan ikke hente flg. URL: %s' % (self.mp['title'] if self.mp['title'] else self, url)) if data: if atag.has_key('usefilename'): usefilename = atag['usefilename'] else: usefilename = None eatt = generateMIMEAttachment(url, data, usefilename) attachments.append(eatt) atag.replaceWithChildren() # kill the actual link # now, put the pieces together html = html.prettify().decode('utf-8') msgHtml = MIMEText(html, 'html', 'utf-8') if not iimgs and not attachments: # pure HTML version msg = msgHtml else: # inline images but no attachments # multipart/related # text/html with html text # image/xxx with inline images # OR # email with inline images + attachment # multipart/mixed # text/html med html udgave # image/gif med billede # application/xxx with word document if attachments: msg = MIMEMultipart('mixed', type='text/html') else: msg = MIMEMultipart('related', type='text/html') del msgHtml['MIME-Version'] msg.attach(msgHtml) # attach images if any for (url, (cid, data)) in iimgs.items(): m = MIMEImage(data) m.add_header('Content-ID', '<%s>' % cid) fn = os.path.basename(url).encode('utf-8') m.add_header('Content-Disposition', 'inline', filename=('utf-8', '', fn)) del m['MIME-Version'] msg.attach(m) # attach attachments if any for attachment in attachments: del attachment['MIME-Version'] msg.attach(attachment) # now for the general headers dt = self.mp['date'] if self.mp.get('time', None): dt += ' ' + self.mp['time'] else: if dt == time.strftime('%d-%m-%Y'): # today ts = time.strftime('%H:%M:%S') if ts > '12:00:00': ts = '12:00:00' dt += ' ' + ts else: dt += ' 12:00:00' dt = time.strptime(dt, '%d-%m-%Y %H:%M:%S') dt = email.utils.formatdate(time.mktime(dt), True) msg['Received'] = ('from %s ([127.0.0.1] helo=%s) ' 'by %s with smtp (fskintra) for %s; %s') % ( hostname, hostname, hostname, config.EMAIL, dt) msg['Date'] = dt title = self.mp['title'] if self.mp['childname']: title = u'[%s] %s' % (self.mp['childname'], title) msg['Subject'] = Header(title, 'utf-8', 60) if 'sender' in self.mp and self.mp['sender']: sender = u'Skoleintra - %s' % self.mp['sender'] else: sender = u'Skoleintra' sender = headerEncodeField(sender) + u' <%s>' % config.SENDER msg['From'] = sender msg['To'] = config.EMAIL # other tags just for ourselves keys = 'mid,md5'.split(',') for key in keys: if self.mp.get(key, None): kkey = 'X-skoleintra-%s' % key msg[kkey] = Header(self.mp[key], 'utf-8', header_name=kkey) self._email = msg return msg
def docFindDocuments(bs, foldername="Dokumentarkiv"): """Input beatifulsoup with content from a page of documents Looks at this and all subfolders, and sends any new messages""" trs = bs.findAll("tr") for line in trs: if not line.has_key("class"): continue if not [c for c in line["class"].split() if c.startswith("linje")]: continue links = line.findAll("a") assert len(links) >= 2 # find file type ext = links[0].img["src"].split("/")[-1][2:-4].lower() # find name of file title = links[1].text ltitle = foldername + " / " + title # find url url = links[0]["href"] config.log(u"Kigger på dokument url: %s" % url, 3) m = re.match(r"javascript:visdokument\((\d+),'([^']+)'\).*", url) if m: url = m.group(2) elif "visdokument" in url.lower(): url = URL_DOC + re.search(".*?(\d+)", links[0]["href"]).group(1) elif links[0].has_key("onclick") and "visdok" in links[0]["onclick"]: url = url # href is actually the file url else: assert "Dokliste" in url url = urllib.quote(url.encode("iso-8859-1"), safe=":/?=&%") # find date dts = line.findAll("td", width="18%") assert len(dts) == 1 and dts[0].text # exactly one date date = dts[0].text # now do stuff if "Dokliste" in url: # this is a subfolder # first look at (potentially cached version) suburl = URL_PREFIX + url subbs = surllib.skoleGetURL(suburl, True) subdate = datetime.date(*reversed(map(int, date.split("-")))) if subbs.cachedate <= subdate or subbs.cacheage >= 1.9: # cached version is too old - refetch subbs = surllib.skoleGetURL(suburl, True, True) config.log(u"Kigger på folderen %s" % title) else: config.log(u"Kigger på folderen %s (fra cache)" % title) docFindDocuments(subbs, ltitle) else: # this is an actual document config.log(u"Kigger på dokumentet %s" % ltitle) # Create HTML snippet html = u"<p>Nyt dokument: <a href=''>%s</a></p>" % ltitle h = surllib.beautify(html) h.a["href"] = url h.a["usefilename"] = title + "." + ext msg = semail.Message("documents", h) msg.setTitle(u"%s" % title) msg.setDate(date) msg.maybeSend()
def parseMessages(cname, bs): '''Look for new messages in each conversation''' # Look for a div with a very long attribute with json main = bs.find('div', 'sk-l-content-wrapper') conversations = None for d in main.findAll('div'): for a in d.attrs: if 'message' not in a.lower() or len(d[a]) < 100: continue try: jsn = json.loads(d[a]) if type(jsn) == dict: conversations = jsn.get('Conversations') break except ValueError: continue if not conversations: config.clog(cname, 'Ingen beskeder fundet?!?', -1) return [] emsgs = [] for i, c in enumerate(conversations[::1]): tid = c.get('ThreadId') lmid = unicode(c.get('LatestMessageId')) if not tid: # ThreadId can be empty if this is a msg to all students tid = '' if not lmid: config.clog(cname, u'Noget galt i tråd #%d %r %r' % (i, tid, lmid), -1) continue if semail.hasSentMessage(tp=SECTION, mid=(tid, lmid)): continue # This last messages has not been seen - load the entire conversation if tid: suffix = ( '/messages/conversations/loadmessagesforselectedconversation' + '?threadId=' + tid + '&takeFromRootMessageId=' + lmid + '&takeToMessageId=0' + '&searchRequest=') else: suffix = ( '/messages/conversations/getmessageforthreadlessconversation' + '?messageId=' + lmid) curl = schildren.getChildURL(cname, suffix) data = surllib.skoleGetURL(curl, asSoup=False, noCache=True, addTimeSuffix=True) try: jsn = json.loads(data) except ValueError: config.clog( cname, 'Kan ikke indlæse besked-listen i tråd %d %r %r' % (i, tid, lmid), -1) continue msgs = jsn if tid else [jsn] assert (type(msgs) == list) for jsn in msgs[::-1]: mid = unicode(jsn.get('Id')) if semail.hasSentMessage(tp=SECTION, mid=(tid, mid)): continue # Generate new messages with this content emsgs.append(msgFromJson(cname, jsn, tid)) return emsgs
def skoleFrontpage(): surllib.skoleLogin() config.log('Behandler forsiden') url = 'http://%s/Infoweb/Fi2/Forside.asp' % config.HOSTNAME data = surllib.skoleGetURL(url, asSoup=True, noCache=True) br = surllib.getBrowser() aurl = br.geturl() if u'Personoplysninger.asp' in aurl: # We are actually asked to confirm our personal data config.log(u'Bekræfter først vores personlige data') skoleConfirmPersonalData(data) data = surllib.skoleGetURL(url, asSoup=True, noCache=True) # find main table maint = [] for mt in data.findAll('table'): if mt.findParents('table') or mt.has_key('bgcolor'): continue txt = mt.text if len(txt) < 30 and txt.lower().startswith(u'forældreintra for '): continue # just the title maint.append(mt) assert (len(maint) == 1) # assume exactly one main table maint = maint[0] # find interesting table tags itags = [] for tag in maint: for ttag in tag.findAll('table'): if ttag.text: itags.append(ttag) g = [] for itag in itags: t = _getTitle(itag) if t is None: # not a title if not g: # In some cases (slideshows), the real title may be missing g.append((itags[0].text, [])) g[-1][1].append(itag) else: # we have a new title g.append((t, [])) for (t, xs) in g: ignore = len(xs) == 0 or t in TITLE_IGNORE config.log(u'Kategori [%s]%s' % (t, ' (hoppes over)' if ignore else '')) if ignore: continue if t == TITLE_COVERPIC: assert (len(xs) == 1) # exactly one cover picture skoleCoverPic(xs[0]) continue elif t == TITLE_BBB: # BBB news are split # ignore first table which is a wrapper around all entries xs = xs[1:] map(skoleFrontBBB, xs) elif t == TITLE_NEWS: # News from... skoleNewsFrom(xs) else: # send msg if something has changed for x in xs: skoleOtherStuff(t, x)