def nicehtml(html): # ensure that the first <li is wrapped in a <ul oul = re.search('(?i)<[ou]l', html) li = re.search('(?i)<li', html) if li: if not oul or (li.start() < oul.start()): # add <ul> st = li.start() html = html[:st] + '</p><ul>' + html[st:] bs = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) return bs.prettify().decode('utf-8')
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title): val = self.mp.get(key, None) if val: mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n" mpp[key] %= (key, title, val) else: mpp[key] = '' wrapOrZap('sender', 'Fra') wrapOrZap('recipient', 'Til') wrapOrZap('cc', 'Kopi til') if mpp.get('time', None): mpp['ttime'] = u' ' + mpp['time'] else: mpp['ttime'] = u'' # create initial HTML version html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>%(title)s</title> </head> <body style='font-family: Verdana,Arial,Helvetica'> <h1>%(title)s</h1> <div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'> %(sender)s%(recipient)s%(cc)s <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p> </div> <div class='text'> %(nicehtml)s </div> </body> </html> ''' html %= mpp html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) # first look for inline images (if any) # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): if not imgtag.has_key('src'): continue # ignore url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid # next, handle attachments # attachments: email attachments ready for attachment :) attachments = [] for atag in html.findAll('a'): try: url = atag['href'] except KeyError: atag.replaceWithChildren() # kill the "broken" link continue url = atag['href'] if 'Tilmelding/Oversigt.asp' in url: atag.replaceWithChildren() # kill link continue if url.startswith('/') or config.HOSTNAME in url: # onsite! data = None try: data = surllib.skoleGetURL(url, False) except: # unable to fetch URL config.log( u'%s: Kan ikke hente flg. URL: %s' % (self.mp['title'] if self.mp['title'] else self, url)) if data: if atag.has_key('usefilename'): usefilename = atag['usefilename'] else: usefilename = None eatt = generateMIMEAttachment(url, data, usefilename) attachments.append(eatt) atag.replaceWithChildren() # kill the actual link # now, put the pieces together html = html.prettify().decode('utf-8') msgHtml = MIMEText(html, 'html', 'utf-8') if not iimgs and not attachments: # pure HTML version msg = msgHtml else: # inline images but no attachments # multipart/related # text/html with html text # image/xxx with inline images # OR # email with inline images + attachment # multipart/mixed # text/html med html udgave # image/gif med billede # application/xxx with word document if attachments: msg = MIMEMultipart('mixed', type='text/html') else: msg = MIMEMultipart('related', type='text/html') del msgHtml['MIME-Version'] msg.attach(msgHtml) # attach images if any for (url, (cid, data)) in iimgs.items(): m = MIMEImage(data) m.add_header('Content-ID', '<%s>' % cid) fn = os.path.basename(url).encode('utf-8') m.add_header('Content-Disposition', 'inline', filename=('utf-8', '', fn)) del m['MIME-Version'] msg.attach(m) # attach attachments if any for attachment in attachments: del attachment['MIME-Version'] msg.attach(attachment) # now for the general headers dt = self.mp['date'] if self.mp.get('time', None): dt += ' ' + self.mp['time'] else: if dt == time.strftime('%d-%m-%Y'): # today ts = time.strftime('%H:%M:%S') if ts > '12:00:00': ts = '12:00:00' dt += ' ' + ts else: dt += ' 12:00:00' dt = time.strptime(dt, '%d-%m-%Y %H:%M:%S') dt = email.utils.formatdate(time.mktime(dt), True) msg['Received'] = ('from %s ([127.0.0.1] helo=%s) ' 'by %s with smtp (fskintra) for %s; %s') % ( hostname, hostname, hostname, config.EMAIL, dt) msg['Date'] = dt title = self.mp['title'] if self.mp['childname']: title = u'[%s] %s' % (self.mp['childname'], title) msg['Subject'] = Header(title, 'utf-8', 60) if 'sender' in self.mp and self.mp['sender']: sender = u'Skoleintra - %s' % self.mp['sender'] else: sender = u'Skoleintra' sender = headerEncodeField(sender) + u' <%s>' % config.SENDER msg['From'] = sender msg['To'] = config.EMAIL # other tags just for ourselves keys = 'mid,md5'.split(',') for key in keys: if self.mp.get(key, None): kkey = 'X-skoleintra-%s' % key msg[kkey] = Header(self.mp[key], 'utf-8', header_name=kkey) self._email = msg return msg
def asEmail(self): if self._email: return self._email self.prepareMessage() hostname = socket.getfqdn() # used below in a few places mpp = self.mp.copy() def wrapOrZap(key, title): val = self.mp.get(key, None) if val: mpp[key] = "<p class='%s' style='margin: 0;'>%s: %s</p>\n" mpp[key] %= (key, title, val) else: mpp[key] = '' wrapOrZap('sender', 'Fra') wrapOrZap('recipient', 'Til') if mpp.get('time', None): mpp['ttime'] = u' ' + mpp['time'] else: mpp['ttime'] = u'' # create initial HTML version html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>%(title)s</title> </head> <body style='font-family: Verdana,Arial,Helvetica'> <h1>%(title)s</h1> <div class='meta' style='background-color: #eaeaea; color: #000; padding: 5px; margin: 0 0 10px 0;'> %(sender)s%(recipient)s <p class='date' style='margin: 0;'>Dato: %(date)s%(ttime)s</p> </div> <div class='text'> %(nicehtml)s </div> </body> </html> ''' html %= mpp html = BeautifulSoup.ICantBelieveItsBeautifulSoup(html) # first look for inline images (if any) # iimags: mapping from URL to (cid, binary string contents) iimgs = {} for imgtag in html.findAll('img'): url = imgtag['src'] if url.lower().startswith('data:'): # ignore 'inline' images continue elif not url: # ignore empty URLs continue if url not in iimgs: try: data = surllib.skoleGetURL(url, False) except urllib2.URLError, e: # could not fetch URL for some reason - ignore continue # is this actually an image? if not imghdr.what('', data): continue # ignore cid = 'image%d-%f@%s' % (len(iimgs) + 1, time.time(), hostname) iimgs[url] = (cid, data) cid, _ = iimgs[url] imgtag['src'] = 'cid:' + cid