def test_japanese_codecs(self): eq = self.ndiffAssertEqual j = Charset("euc-jp") g = Charset("iso-8859-1") h = Header("Hello World!") jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' ghello = 'Gr\xfc\xdf Gott!' h.append(jhello, j) h.append(ghello, g) # BAW: This used to -- and maybe should -- fold the two iso-8859-1 # chunks into a single encoded word. However it doesn't violate the # standard to have them as two encoded chunks and maybe it's # reasonable <wink> for each .append() call to result in a separate # encoded word. eq( h.encode(), """\ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") eq(decode_header(h.encode()), [('Hello World!', None), ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' h = Header(long, j, header_name="Subject") # test a very long header enc = h.encode() # TK: splitting point may differ by codec design and/or Header encoding eq( enc, """\ =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") # TK: full decode comparison eq(h.__unicode__().encode('euc-jp'), long)
def mejl(tabelka, ustawieniaMejla): od, do, smtp = tuple([ustawieniaMejla[x] for x in ["od", "do", "smtp"]]) tekst = u"<h2>Wyniki</h2>" + "<ul>" for dzien in tabelka.keys(): tekst = tekst + "<li>" + uni(dzien) + "<ol>" for wynikDnia in tabelka[dzien]: tekst = tekst + "<li>" + uni(wynikDnia) + "</li>" tekst = tekst + "</ol></li>" tekst = tekst + ("</ul>" +"<br/>\r-- " +"<br/>\r %s") \ % datetime.datetime.now().__str__() temat = "[MEDICOVER] %s" % (datetime.datetime.now()) charset = Charset('utf-8') tresc = MIMEText(tekst.encode('utf-8'), 'html') tresc.set_charset(charset) tresc['From'] = od tresc['To'] = ", ".join(do) tresc['Subject'] = temat if ustawieniaMejla.get('smtp_tls'): smtp_pass = haslo(smtp, od, ustawieniaMejla.get('smtp_password')) serwer = smtplib.SMTP(smtp, 587) serwer.starttls() serwer.login(od, smtp_pass) else: serwer = smtplib.SMTP(smtp) serwer.sendmail(od, do, tresc.as_string()) serwer.quit()
def append(self, s, charset = None, errors = 'strict'): if charset is None: charset = self._charset elif not isinstance(charset, Charset): charset = Charset(charset) if charset != '8bit': if isinstance(s, str): if not charset.input_codec: pass incodec = 'us-ascii' ustr = unicode(s, incodec, errors) if not charset.output_codec: pass outcodec = 'us-ascii' ustr.encode(outcodec, errors) elif isinstance(s, unicode): for charset in (USASCII, charset, UTF8): try: if not charset.output_codec: pass outcodec = 'us-ascii' s = s.encode(outcodec, errors) continue except UnicodeError: continue
def decodeMessageAsString(msg): """ This helper method takes Message object or string and returns string which does not contain base64 encoded parts Returns message without any encoding in parts """ if isinstance(msg, str): msg = Parser().parsestr(msg) new = deepcopy(msg) # From is utf8 encoded: '=?utf-8?q?Site_Administrator_=3C=3E?=' new.replace_header('From', decode_header(new['From'])[0][0]) new.replace_header('Subject', decode_header(new['Subject'])[0][0]) charset = Charset('utf-8') charset.header_encoding = SHORTEST charset.body_encoding = QP charset.output_charset = 'utf-8' for part in new.walk(): if part.get_content_maintype() == "multipart": continue decoded = part.get_payload(decode=1) del part['Content-Transfer-Encoding'] part.set_payload(decoded, charset) return new.as_string()
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, continuation_ws=' ', errors='strict'): if charset is None: charset = USASCII if not isinstance(charset, Charset): charset = Charset(charset) self._charset = charset self._continuation_ws = continuation_ws cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) self._chunks = [] if s is not None: self.append(s, charset, errors) if maxlinelen is None: maxlinelen = MAXLINELEN if header_name is None: self._firstlinelen = maxlinelen else: self._firstlinelen = maxlinelen - len(header_name) - 2 self._maxlinelen = maxlinelen - cws_expanded_len return
class TestEncodeAddress(object): """ Address encoding tests See http://www.faqs.org/rfcs/rfc2822.html section 3.4. Address Specification. mailbox = name-addr / addr-spec name-addr = [display-name] angle-addr angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr """ charset = Charset(config.charset) charset.header_encoding = QP charset.body_encoding = QP def testSimpleAddress(self): """ mail.sendmail: encode simple address: local@domain """ address = u'local@domain' expected = address.encode(config.charset) assert sendmail.encodeAddress(address, self.charset) == expected def testComposite(self): """ mail.sendmail: encode address: 'Phrase <local@domain>' """ address = u'Phrase <local@domain>' expected = str(address) assert sendmail.encodeAddress(address, self.charset) == expected def testCompositeUnicode(self): """ mail.sendmail: encode Uncode address: 'ויקי <local@domain>' """ address = u'ויקי <local@domain>' phrase = str(Header(u'ויקי'.encode('utf-8'), self.charset)) expected = phrase + ' ' + '<local@domain>' assert sendmail.encodeAddress(address, self.charset) == expected def testEmptyPhrase(self): """ mail.sendmail: encode address with empty phrase: '<local@domain>' """ address = u'<local@domain>' expected = 'local@domain' assert sendmail.encodeAddress(address, self.charset) == expected def testEmptyAddress(self): """ mail.sendmail: encode address with empty address: 'Phrase <>' Let the smtp server handle this. We may raise error in such case, but we don't do error checking for mail addresses. """ address = u'Phrase <>' expected = str(address) assert sendmail.encodeAddress(address, self.charset) == expected def testInvalidAddress(self): """ mail.sendmail: encode invalid address 'Phrase <blah' Assume that this is a simple address. This address will probably cause an error when trying to send mail. Junk in, junk out. """ address = u'Phrase <blah' expected = str(address) assert sendmail.encodeAddress(address, self.charset) == expected
def to_cset_out(text, lcset): # Convert text from unicode or lcset to output cset. ocset = Charset(lcset).get_output_charset() or lcset if isinstance(text, unicode): return text.encode(ocset, errors='replace') else: return text.decode(lcset, errors='replace').encode(ocset, errors='replace')
def make_header(decoded_seq, maxlinelen = None, header_name = None, continuation_ws = ' '): h = Header(maxlinelen = maxlinelen, header_name = header_name, continuation_ws = continuation_ws) for (s, charset) in decoded_seq: if charset is not None and not isinstance(charset, Charset): charset = Charset(charset) h.append(s, charset) return h
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, continuation_ws=' ', errors='strict'): """Create a MIME-compliant header that can contain many character sets. Optional s is the initial header value. If None, the initial header value is not set. You can later append to the header with .append() method calls. s may be a byte string or a Unicode string, but see the .append() documentation for semantics. Optional charset serves two purposes: it has the same meaning as the charset argument to the .append() method. It also sets the default character set for all subsequent .append() calls that omit the charset argument. If charset is not provided in the constructor, the us-ascii charset is used both as s's initial charset and as the default for subsequent .append() calls. The maximum line length can be specified explicit via maxlinelen. For splitting the first line to a shorter value (to account for the field header which isn't included in s, e.g. `Subject') pass in the name of the field in header_name. The default maxlinelen is 76. continuation_ws must be RFC 2822 compliant folding whitespace (usually either a space or a hard tab) which will be prepended to continuation lines. errors is passed through to the .append() call. """ if charset is None: charset = USASCII if not isinstance(charset, Charset): charset = Charset(charset) self._charset = charset self._continuation_ws = continuation_ws cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) # BAW: I believe `chunks' and `maxlinelen' should be non-public. self._chunks = [] if s is not None: self.append(s, charset, errors) if maxlinelen is None: maxlinelen = MAXLINELEN if header_name is None: # We don't know anything about the field header so the first line # is the same length as subsequent lines. self._firstlinelen = maxlinelen else: # The first line should be shorter to take into account the field # header. Also subtract off 2 extra for the colon and space. self._firstlinelen = maxlinelen - len(header_name) - 2 # Second and subsequent lines should subtract off the length in # columns of the continuation whitespace prefix. self._maxlinelen = maxlinelen - cws_expanded_len
def append(self, s, charset=None, errors='strict'): """Append a string to the MIME header. Optional charset, if given, should be a Charset instance or the name of a character set (which will be converted to a Charset instance). A value of None (the default) means that the charset given in the constructor is used. s may be a byte string or a Unicode string. If it is a byte string (i.e. isinstance(s, StringType) is true), then charset is the encoding of that byte string, and a UnicodeError will be raised if the string cannot be decoded with that charset. If s is a Unicode string, then charset is a hint specifying the character set of the characters in the string. In this case, when producing an RFC 2822 compliant header using RFC 2047 rules, the Unicode string will be encoded using the following charsets in order: us-ascii, the charset hint, utf-8. The first character set not to provoke a UnicodeError is used. Optional `errors' is passed as the third argument to any unicode() or ustr.encode() call. """ if charset is None: charset = self._charset elif not isinstance(charset, Charset): charset = Charset(charset) # If the charset is our faux 8bit charset, leave the string unchanged if charset <> '8bit': # We need to test that the string can be converted to unicode and # back to a byte string, given the input and output codecs of the # charset. if isinstance(s, StringType): # Possibly raise UnicodeError if the byte string can't be # converted to a unicode with the input codec of the charset. incodec = charset.input_codec or 'us-ascii' ustr = unicode(s, incodec, errors) # Now make sure that the unicode could be converted back to a # byte string with the output codec, which may be different # than the iput coded. Still, use the original byte string. outcodec = charset.output_codec or 'us-ascii' ustr.encode(outcodec, errors) elif isinstance(s, UnicodeType): # Now we have to be sure the unicode string can be converted # to a byte string with a reasonable output codec. We want to # use the byte string in the chunk. for charset in USASCII, charset, UTF8: try: outcodec = charset.output_codec or 'us-ascii' s = s.encode(outcodec, errors) break except UnicodeError: pass else: assert False, 'utf-8 conversion failed' self._chunks.append((s, charset))
def create_message(self): if self.attach: self.create_multipart_message() else: self.create_text_message() """comment """ charset = Charset(self.encoding) charset.header_encoding = QP charset.body_encoding = QP self.msg.set_charset(charset) """
def prepare_message(template, variables, encoding="utf-8"): r"""Return a prepared email.Message object. >>> template = (u"Subject: @SUBJECT@\n"+ ... u"From: @FROM@\n"+ ... u"To: @TO@\n"+ ... u"BCC: @FROM@\n\n"+ ... u"Hello, @GREETED@!") >>> variables = dict(SUBJECT="Test", ... FROM="*****@*****.**", ... TO="*****@*****.**", ... GREETED="World") >>> message = prepare_message(template, variables) >>> message["SUBJECT"] == variables["SUBJECT"] True >>> message["TO"] == variables["TO"] True >>> message["FROM"] == message["BCC"] == variables["FROM"] True >>> message.get_payload() 'Hello, World!' """ template = u"\r\n".join(template.splitlines()) template = replace_variables(template, variables) template = template.encode(encoding) message = message_from_string(template) DEFAULT_HEADERS = {"to": "@INVITEDEMAIL@", "from": "@INVITEREMAIL@"} for key, value in DEFAULT_HEADERS.iteritems(): if key not in message: value = replace_variables(value, variables) if isinstance(value, unicode): value = value.encode(encoding) message[key] = value charset = Charset(encoding) charset.header_encoding = QP charset.body_encoding = QP message.set_charset(charset) for field in ("from", "to", "cc", "bcc"): try: encode_address_field(message, field, encoding, charset) except UnicodeEncodeError as error: raise InviteException("Invalid '{0}' address: {1}".format( field, error)) return message
def _encode_address_string(text, charset): """Split the email into parts and use header encoding on the name part if needed. We do this because the actual addresses need to be ASCII with no encoding for most SMTP servers, but the non-address parts should be encoded appropriately.""" header = Header() name, addr = parseaddr(text) try: name.decode('us-ascii') except UnicodeDecodeError: if charset: charset = Charset(charset) name = charset.header_encode(name) # We again replace rather than raise an error or pass an 8bit string header.append(formataddr((name, addr)), errors='replace') return header
def mejl(self, tabelka, ustawieniaMejla): od, do, smtp = tuple( [ustawieniaMejla[x] for x in ["od", "do", "smtp"]]) tekst = u"<h2>Wyniki</h2><ul>" poprzedniDzien = "" for wiersz in tabelka: if wiersz[0] != poprzedniDzien: tekst = tekst + "</ul><h4>%s</h4><ul>" % wiersz[0].strftime( "%A, %Y-%m-%d") reprezentacja = self.pozbadzSiePolskichLiter( "%s" % (", ".join(wiersz[1:]))) style = "" if not self.sprawdzCzyJuzSpotkalismy("%s" % wiersz[0] + reprezentacja): style = " style='color: green'" tekst = tekst + "<li%s>%s</li>" % (style, reprezentacja) poprzedniDzien = wiersz[0] tekst = tekst + "</ul><br/><br/>" tekst = tekst + ("<br/>\r-- " + "<br/>\r %s") % datetime.datetime.now().__str__() temat = "[%s] %s" % (self.naglowekWMejlu, datetime.datetime.now()) charset = Charset('utf-8') tresc = MIMEText(tekst.encode('utf-8'), 'html') tresc.set_charset(charset) tresc['From'] = od tresc['To'] = ", ".join(do) tresc['Subject'] = temat if ustawieniaMejla.get('smtp_tls'): smtp_pass = hasla.haslo(smtp, od, ustawieniaMejla.get('smtp_password')) serwer = smtplib.SMTP(smtp, 587) serwer.starttls() serwer.login(od, smtp_pass) else: serwer = smtplib.SMTP(smtp) serwer.sendmail(od, do, tresc.as_string()) serwer.quit()
def encode_header_param(param_text): """Returns an appropriate RFC2047 encoded representation of the given header parameter value, suitable for direct assignation as the param value (e.g. via Message.set_param() or Message.add_header()) RFC2822 assumes that headers contain only 7-bit characters, so we ensure it is the case, using RFC2047 encoding when needed. :param param_text: unicode or utf-8 encoded string with header value :rtype: string :return: if ``param_text`` represents a plain ASCII string, return the same 7-bit string, otherwise returns an ASCII string containing the RFC2047 encoded text. """ # For details see the encode_header() method that uses the same logic if not param_text: return "" param_text_utf8 = tools.ustr(param_text).encode('utf-8') param_text_ascii = try_coerce_ascii(param_text_utf8) return param_text_ascii if param_text_ascii\ else Charset('utf8').header_encode(param_text_utf8)
def __init__(self, recip, sender, subject=None, text=None, lang=None): Message.__init__(self) charset = None if lang is not None: charset = Charset(Utils.GetCharSet(lang)) if text is not None: self.set_payload(text, charset) if subject is None: subject = '(no subject)' self['Subject'] = Header(subject, charset, header_name='Subject', errors='replace') self['From'] = sender if isinstance(recip, ListType): self['To'] = COMMASPACE.join(recip) self.recips = recip else: self['To'] = recip self.recips = [recip]
def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): """Create a Header from a sequence of pairs as returned by decode_header() decode_header() takes a header value string and returns a sequence of pairs of the format (decoded_string, charset) where charset is the string name of the character set. This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) for s, charset in decoded_seq: # None means us-ascii but we can simply pass it on to h.append() if charset is not None and not isinstance(charset, Charset): charset = Charset(charset) h.append(s, charset) return h
def _init_pref_encoding(self): from email.Charset import BASE64, QP, SHORTEST, Charset self._charset = Charset() self._charset.input_charset = 'utf-8' self._charset.output_charset = 'utf-8' self._charset.input_codec = 'utf-8' self._charset.output_codec = 'utf-8' pref = self.config.get('notification', 'mime_encoding').lower() if pref == 'base64': self._charset.header_encoding = BASE64 self._charset.body_encoding = BASE64 elif pref in ('qp', 'quoted-printable'): self._charset.header_encoding = QP self._charset.body_encoding = QP elif pref == 'none': self._charset.header_encoding = SHORTEST self._charset.body_encoding = None else: raise TracError(_("Invalid email encoding setting: %(pref)s", pref=pref))
def _set_charset(self, mime): from email.Charset import Charset, QP, BASE64, SHORTEST mime_encoding = self.config.get('notification', 'mime_encoding').lower() charset = Charset() charset.input_charset = 'utf-8' charset.output_charset = 'utf-8' charset.input_codec = 'utf-8' charset.output_codec = 'utf-8' if mime_encoding == 'base64': charset.header_encoding = BASE64 charset.body_encoding = BASE64 elif mime_encoding in ('qp', 'quoted-printable'): charset.header_encoding = QP charset.body_encoding = QP elif mime_encoding == 'none': charset.header_encoding = SHORTEST charset.body_encoding = None del mime['Content-Transfer-Encoding'] mime.set_charset(charset)
def __setstate__(self, d): # The base class attributes have changed over time. Which could # affect Mailman if messages are sitting in the queue at the time of # upgrading the email package. We shouldn't burden email with this, # so we handle schema updates here. self.__dict__ = d # We know that email 2.4.3 is up-to-date version = d.get('__version__', (0, 0, 0)) d['__version__'] = VERSION if version >= VERSION: return # Messages grew a _charset attribute between email version 0.97 and 1.1 if not d.has_key('_charset'): self._charset = None # Messages grew a _default_type attribute between v2.1 and v2.2 if not d.has_key('_default_type'): # We really have no idea whether this message object is contained # inside a multipart/digest or not, so I think this is the best we # can do. self._default_type = 'text/plain' # Header instances used to allow both strings and Charsets in their # _chunks, but by email 2.4.3 now it's just Charsets. headers = [] hchanged = 0 for k, v in self._headers: if isinstance(v, Header): chunks = [] cchanged = 0 for s, charset in v._chunks: if isinstance(charset, StringType): charset = Charset(charset) cchanged = 1 chunks.append((s, charset)) if cchanged: v._chunks = chunks hchanged = 1 headers.append((k, v)) if hchanged: self._headers = headers
def append(self, s, charset=None, errors='strict'): if charset is None: charset = self._charset elif not isinstance(charset, Charset): charset = Charset(charset) if charset != '8bit': if isinstance(s, str): incodec = charset.input_codec or 'us-ascii' ustr = unicode(s, incodec, errors) outcodec = charset.output_codec or 'us-ascii' ustr.encode(outcodec, errors) elif isinstance(s, unicode): for charset in (USASCII, charset, UTF8): try: outcodec = charset.output_codec or 'us-ascii' s = s.encode(outcodec, errors) break except UnicodeError: pass self._chunks.append((s, charset)) return
def _init_pref_encoding(self): self._charset = Charset() self._charset.input_charset = 'utf-8' pref = self.mime_encoding.lower() if pref == 'base64': self._charset.header_encoding = BASE64 self._charset.body_encoding = BASE64 self._charset.output_charset = 'utf-8' self._charset.input_codec = 'utf-8' self._charset.output_codec = 'utf-8' elif pref in ['qp', 'quoted-printable']: self._charset.header_encoding = QP self._charset.body_encoding = QP self._charset.output_charset = 'utf-8' self._charset.input_codec = 'utf-8' self._charset.output_codec = 'utf-8' elif pref == 'none': self._charset.header_encoding = None self._charset.body_encoding = None self._charset.input_codec = None self._charset.output_charset = 'ascii' else: raise TracError(_('Invalid email encoding setting: %s' % pref))
def _init_pref_encoding(self): from email.Charset import Charset, QP, BASE64 self._charset = Charset() self._charset.input_charset = 'utf-8' pref = self.env.config.get('notification', 'mime_encoding').lower() if pref == 'base64': self._charset.header_encoding = BASE64 self._charset.body_encoding = BASE64 self._charset.output_charset = 'utf-8' self._charset.input_codec = 'utf-8' self._charset.output_codec = 'utf-8' elif pref in ['qp', 'quoted-printable']: self._charset.header_encoding = QP self._charset.body_encoding = QP self._charset.output_charset = 'utf-8' self._charset.input_codec = 'utf-8' self._charset.output_codec = 'utf-8' elif pref == 'none': self._charset.header_encoding = None self._charset.body_encoding = None self._charset.input_codec = None self._charset.output_charset = 'ascii' else: raise TracError, 'Invalid email encoding setting: %s' % pref
def verpdeliver(mlist, msg, msgdata, envsender, failures, conn): for recip in msgdata['recips']: # We now need to stitch together the message with its header and # footer. If we're VERPIng, we have to calculate the envelope sender # for each recipient. Note that the list of recipients must be of # length 1. # # BAW: ezmlm includes the message number in the envelope, used when # sending a notification to the user telling her how many messages # they missed due to bouncing. Neat idea. msgdata['recips'] = [recip] # Make a copy of the message and decorate + delivery that msgcopy = copy.deepcopy(msg) Decorate.process(mlist, msgcopy, msgdata) # Calculate the envelope sender, which we may be VERPing if msgdata.get('verp'): bmailbox, bdomain = Utils.ParseEmail(envsender) rmailbox, rdomain = Utils.ParseEmail(recip) if rdomain is None: # The recipient address is not fully-qualified. We can't # deliver it to this person, nor can we craft a valid verp # header. I don't think there's much we can do except ignore # this recipient. syslog('smtp', 'Skipping VERP delivery to unqual recip: %s', recip) continue d = {'bounces': bmailbox, 'mailbox': rmailbox, 'host' : DOT.join(rdomain), } envsender = '%s@%s' % ((mm_cfg.VERP_FORMAT % d), DOT.join(bdomain)) if mlist.personalize == 2: # When fully personalizing, we want the To address to point to the # recipient, not to the mailing list del msgcopy['to'] name = None if mlist.isMember(recip): name = mlist.getMemberName(recip) if name: # Convert the name to an email-safe representation. If the # name is a byte string, convert it first to Unicode, given # the character set of the member's language, replacing bad # characters for which we can do nothing about. Once we have # the name as Unicode, we can create a Header instance for it # so that it's properly encoded for email transport. charset = Utils.GetCharSet(mlist.getMemberLanguage(recip)) if charset == 'us-ascii': # Since Header already tries both us-ascii and utf-8, # let's add something a bit more useful. charset = 'iso-8859-1' charset = Charset(charset) codec = charset.input_codec or 'ascii' if not isinstance(name, UnicodeType): name = unicode(name, codec, 'replace') name = Header(name, charset).encode() msgcopy['To'] = formataddr((name, recip)) else: msgcopy['To'] = recip # We can flag the mail as a duplicate for each member, if they've # already received this message, as calculated by Message-ID. See # AvoidDuplicates.py for details. del msgcopy['x-mailman-copy'] if msgdata.get('add-dup-header', {}).has_key(recip): msgcopy['X-Mailman-Copy'] = 'yes' # For the final delivery stage, we can just bulk deliver to a party of # one. ;) bulkdeliver(mlist, msgcopy, msgdata, envsender, failures, conn)
def pending_requests(mlist): # Must return a byte string lcset = mlist.preferred_language.charset pending = [] first = True requestsdb = IListRequests(mlist) for request in requestsdb.of_type(RequestType.subscription): if first: pending.append(_('Pending subscriptions:')) first = False key, data = requestsdb.get_request(request.id) when = data['when'] addr = data['addr'] fullname = data['fullname'] passwd = data['passwd'] digest = data['digest'] lang = data['lang'] if fullname: if isinstance(fullname, unicode): fullname = fullname.encode(lcset, 'replace') fullname = ' (%s)' % fullname pending.append(' %s%s %s' % (addr, fullname, time.ctime(when))) first = True for request in requestsdb.of_type(RequestType.held_message): if first: pending.append(_('\nPending posts:')) first = False key, data = requestsdb.get_request(request.id) when = data['when'] sender = data['sender'] subject = data['subject'] reason = data['reason'] text = data['text'] msgdata = data['msgdata'] subject = Utils.oneline(subject, lcset) date = time.ctime(when) reason = _(reason) pending.append( _("""\ From: $sender on $date Subject: $subject Cause: $reason""")) pending.append('') # Coerce all items in pending to a Unicode so we can join them upending = [] charset = mlist.preferred_language.charset for s in pending: if isinstance(s, unicode): upending.append(s) else: upending.append(unicode(s, charset, 'replace')) # Make sure that the text we return from here can be encoded to a byte # string in the charset of the list's language. This could fail if for # example, the request was pended while the list's language was French, # but then it was changed to English before checkdbs ran. text = NL.join(upending) charset = Charset(mlist.preferred_language.charset) incodec = charset.input_codec or 'ascii' outcodec = charset.output_codec or 'ascii' if isinstance(text, unicode): return text.encode(outcodec, 'replace') # Be sure this is a byte string encodeable in the list's charset utext = unicode(text, incodec, 'replace') return utext.encode(outcodec, 'replace')
def send_i18n_digests(mlist, mboxfp): mbox = Mailbox(mboxfp) # Prepare common information (first lang/charset) lang = mlist.preferred_language lcset = Utils.GetCharSet(lang) lcset_out = Charset(lcset).output_charset or lcset # Common Information (contd) realname = mlist.real_name volume = mlist.volume issue = mlist.next_digest_number digestid = _('%(realname)s Digest, Vol %(volume)d, Issue %(issue)d') digestsubj = Header(digestid, lcset, header_name='Subject') # Set things up for the MIME digest. Only headers not added by # CookHeaders need be added here. # Date/Message-ID should be added here also. mimemsg = Message.Message() mimemsg['Content-Type'] = 'multipart/mixed' mimemsg['MIME-Version'] = '1.0' mimemsg['From'] = mlist.GetRequestEmail() mimemsg['Subject'] = digestsubj mimemsg['To'] = mlist.GetListEmail() mimemsg['Reply-To'] = mlist.GetListEmail() mimemsg['Date'] = formatdate(localtime=1) mimemsg['Message-ID'] = Utils.unique_message_id(mlist) # Set things up for the rfc1153 digest plainmsg = StringIO() rfc1153msg = Message.Message() rfc1153msg['From'] = mlist.GetRequestEmail() rfc1153msg['Subject'] = digestsubj rfc1153msg['To'] = mlist.GetListEmail() rfc1153msg['Reply-To'] = mlist.GetListEmail() rfc1153msg['Date'] = formatdate(localtime=1) rfc1153msg['Message-ID'] = Utils.unique_message_id(mlist) separator70 = '-' * 70 separator30 = '-' * 30 # In the rfc1153 digest, the masthead contains the digest boilerplate plus # any digest header. In the MIME digests, the masthead and digest header # are separate MIME subobjects. In either case, it's the first thing in # the digest, and we can calculate it now, so go ahead and add it now. mastheadtxt = Utils.maketext( 'masthead.txt', { 'real_name': mlist.real_name, 'got_list_email': mlist.GetListEmail(), 'got_listinfo_url': mlist.GetScriptURL('listinfo', absolute=1), 'got_request_email': mlist.GetRequestEmail(), 'got_owner_email': mlist.GetOwnerEmail(), }, mlist=mlist) # MIME masthead = MIMEText(mastheadtxt, _charset=lcset) masthead['Content-Description'] = digestid mimemsg.attach(masthead) # RFC 1153 print >> plainmsg, mastheadtxt print >> plainmsg # Now add the optional digest header but only if more than whitespace. if re.sub('\s', '', mlist.digest_header): headertxt = decorate(mlist, mlist.digest_header, _('digest header')) # MIME header = MIMEText(headertxt, _charset=lcset) header['Content-Description'] = _('Digest Header') mimemsg.attach(header) # RFC 1153 print >> plainmsg, headertxt print >> plainmsg # Now we have to cruise through all the messages accumulated in the # mailbox file. We can't add these messages to the plainmsg and mimemsg # yet, because we first have to calculate the table of contents # (i.e. grok out all the Subjects). Store the messages in a list until # we're ready for them. # # Meanwhile prepare things for the table of contents toc = StringIO() print >> toc, _("Today's Topics:\n") # Now cruise through all the messages in the mailbox of digest messages, # building the MIME payload and core of the RFC 1153 digest. We'll also # accumulate Subject: headers and authors for the table-of-contents. messages = [] msgcount = 0 msg = mbox.next() while msg is not None: if msg == '': # It was an unparseable message msg = mbox.next() continue msgcount += 1 messages.append(msg) # Get the Subject header msgsubj = msg.get('subject', _('(no subject)')) subject = Utils.oneline(msgsubj, lcset) # Don't include the redundant subject prefix in the toc mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), subject, re.IGNORECASE) if mo: subject = subject[:mo.start(2)] + subject[mo.end(2):] username = '' addresses = getaddresses([Utils.oneline(msg.get('from', ''), lcset)]) # Take only the first author we find if isinstance(addresses, ListType) and addresses: username = addresses[0][0] if not username: username = addresses[0][1] if username: username = '******' % username # Put count and Wrap the toc subject line wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65) slines = wrapped.split('\n') # See if the user's name can fit on the last line if len(slines[-1]) + len(username) > 70: slines.append(username) else: slines[-1] += username # Add this subject to the accumulating topics first = True for line in slines: if first: print >> toc, ' ', line first = False else: print >> toc, ' ', line.lstrip() # We do not want all the headers of the original message to leak # through in the digest messages. For this phase, we'll leave the # same set of headers in both digests, i.e. those required in RFC 1153 # plus a couple of other useful ones. We also need to reorder the # headers according to RFC 1153. Later, we'll strip out headers for # for the specific MIME or plain digests. keeper = {} all_keepers = {} for header in (mm_cfg.MIME_DIGEST_KEEP_HEADERS + mm_cfg.PLAIN_DIGEST_KEEP_HEADERS): all_keepers[header] = True all_keepers = all_keepers.keys() for keep in all_keepers: keeper[keep] = msg.get_all(keep, []) # Now remove all unkempt headers :) for header in msg.keys(): del msg[header] # And add back the kept header in the RFC 1153 designated order for keep in all_keepers: for field in keeper[keep]: msg[keep] = field # And a bit of extra stuff msg['Message'] = ` msgcount ` # Get the next message in the digest mailbox msg = mbox.next() # Now we're finished with all the messages in the digest. First do some # sanity checking and then on to adding the toc. if msgcount == 0: # Why did we even get here? return toctext = to_cset_out(toc.getvalue(), lcset) # MIME tocpart = MIMEText(toctext, _charset=lcset) tocpart['Content-Description'] = _( "Today's Topics (%(msgcount)d messages)") mimemsg.attach(tocpart) # RFC 1153 print >> plainmsg, toctext print >> plainmsg # For RFC 1153 digests, we now need the standard separator print >> plainmsg, separator70 print >> plainmsg # Now go through and add each message mimedigest = MIMEBase('multipart', 'digest') mimemsg.attach(mimedigest) first = True for msg in messages: # MIME. Make a copy of the message object since the rfc1153 # processing scrubs out attachments. mimedigest.attach(MIMEMessage(copy.deepcopy(msg))) # rfc1153 if first: first = False else: print >> plainmsg, separator30 print >> plainmsg # Use Mailman.Handlers.Scrubber.process() to get plain text try: msg = scrubber(mlist, msg) except Errors.DiscardMessage: print >> plainmsg, _('[Message discarded by content filter]') continue # Honor the default setting for h in mm_cfg.PLAIN_DIGEST_KEEP_HEADERS: if msg[h]: uh = Utils.wrap('%s: %s' % (h, Utils.oneline(msg[h], lcset))) uh = '\n\t'.join(uh.split('\n')) print >> plainmsg, uh print >> plainmsg # If decoded payload is empty, this may be multipart message. # -- just stringfy it. payload = msg.get_payload(decode=True) \ or msg.as_string().split('\n\n',1)[1] mcset = msg.get_content_charset('') if mcset and mcset <> lcset and mcset <> lcset_out: try: payload = unicode(payload, mcset, 'replace').encode(lcset, 'replace') except (UnicodeError, LookupError): # TK: Message has something unknown charset. # _out means charset in 'outer world'. payload = unicode(payload, lcset_out, 'replace').encode(lcset, 'replace') print >> plainmsg, payload if not payload.endswith('\n'): print >> plainmsg # Now add the footer but only if more than whitespace. if re.sub('\s', '', mlist.digest_footer): footertxt = decorate(mlist, mlist.digest_footer, _('digest footer')) # MIME footer = MIMEText(footertxt, _charset=lcset) footer['Content-Description'] = _('Digest Footer') mimemsg.attach(footer) # RFC 1153 # MAS: There is no real place for the digest_footer in an RFC 1153 # compliant digest, so add it as an additional message with # Subject: Digest Footer print >> plainmsg, separator30 print >> plainmsg print >> plainmsg, 'Subject: ' + _('Digest Footer') print >> plainmsg print >> plainmsg, footertxt print >> plainmsg print >> plainmsg, separator30 print >> plainmsg # Do the last bit of stuff for each digest type signoff = _('End of ') + digestid # MIME # BAW: This stuff is outside the normal MIME goo, and it's what the old # MIME digester did. No one seemed to complain, probably because you # won't see it in an MUA that can't display the raw message. We've never # got complaints before, but if we do, just wax this. It's primarily # included for (marginally useful) backwards compatibility. mimemsg.postamble = signoff # rfc1153 print >> plainmsg, signoff print >> plainmsg, '*' * len(signoff) # Do our final bit of housekeeping, and then send each message to the # outgoing queue for delivery. mlist.next_digest_number += 1 virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR) # Calculate the recipients lists plainrecips = [] mimerecips = [] drecips = mlist.getDigestMemberKeys() + mlist.one_last_digest.keys() for user in mlist.getMemberCPAddresses(drecips): # user might be None if someone who toggled off digest delivery # subsequently unsubscribed from the mailing list. Also, filter out # folks who have disabled delivery. if user is None or mlist.getDeliveryStatus(user) <> ENABLED: continue # Otherwise, decide whether they get MIME or RFC 1153 digests if mlist.getMemberOption(user, mm_cfg.DisableMime): plainrecips.append(user) else: mimerecips.append(user) # Zap this since we're now delivering the last digest to these folks. mlist.one_last_digest.clear() # MIME virginq.enqueue(mimemsg, recips=mimerecips, listname=mlist.internal_name(), isdigest=True) # RFC 1153 rfc1153msg.set_payload(to_cset_out(plainmsg.getvalue(), lcset), lcset) virginq.enqueue(rfc1153msg, recips=plainrecips, listname=mlist.internal_name(), isdigest=True)
def __init__(self, message=None, sequence=0, keepHeaders=[], lang=mm_cfg.DEFAULT_SERVER_LANGUAGE, mlist=None): self.__super_init(message, sequence, keepHeaders) self.prev = None self.next = None # Trim Re: from the subject line i = 0 while i != -1: result = REpat.match(self.subject) if result: i = result.end(0) self.subject = self.subject[i:] else: i = -1 # Useful to keep around self._lang = lang self._mlist = mlist if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS: # Avoid i18n side-effects. Note that the language for this # article (for this list) could be different from the site-wide # preferred language, so we need to ensure no side-effects will # occur. Think what happens when executing bin/arch. otrans = i18n.get_translation() try: i18n.set_language(lang) if self.author == self.email: self.author = self.email = re.sub('@', _(' at '), self.email) else: self.email = re.sub('@', _(' at '), self.email) finally: i18n.set_translation(otrans) # Snag the content-* headers. RFC 1521 states that their values are # case insensitive. ctype = message.get('Content-Type', 'text/plain') cenc = message.get('Content-Transfer-Encoding', '') self.ctype = ctype.lower() self.cenc = cenc.lower() self.decoded = {} cset = Utils.GetCharSet(mlist.preferred_language) cset_out = Charset(cset).output_charset or cset if isinstance(cset_out, unicode): # email 3.0.1 (python 2.4) doesn't like unicode cset_out = cset_out.encode('us-ascii') charset = message.get_content_charset(cset_out) if charset: charset = charset.lower().strip() if charset[0] == '"' and charset[-1] == '"': charset = charset[1:-1] if charset[0] == "'" and charset[-1] == "'": charset = charset[1:-1] try: body = message.get_payload(decode=True) except binascii.Error: body = None if body and charset != Utils.GetCharSet(self._lang): # decode body try: body = unicode(body, charset) except (UnicodeError, LookupError): body = None if body: self.body = [l + "\n" for l in body.splitlines()] self.decode_headers()
def process(mlist, msg, msgdata=None): # main entry code for the Handler global DEBUG if hasattr(mlist, 'debug'): DEBUG = mlist.debug debug('AttachmentMove Enter ' + '-' * 30) if msgdata is None: msgdata = {} modified = False #dir = calculate_attachments_dir(mlist, msg, msgdata) dir = 'attachments-moved' # Now walk over all subparts of this message and scrub out various types seen_attachment = [] boundary = None # as we replace some content we will have to fight with encoding # set some default list encoding lcset = Utils.GetCharSet(mlist.preferred_language) lcset_out = Charset(lcset).output_charset or lcset for part in msg.walk(): ctype = part.get_content_type() partlen = len(part.get_payload()) debug('met part : %s %d', ctype, partlen) # If the part is text/plain, we leave it alone if ctype == 'text/plain': continue elif ctype == 'text/html': continue elif ctype == 'message/rfc822': continue elif partlen > 0 and not part.is_multipart(): # we met an attachment debug('> part is attachment %s', ctype) if part.has_key('Content-ID'): debug('> part as Content-ID %s', part['Content-ID']) # keep it continue else: debug('> detaching...') # we are going to detach it and store it localy and remotly # a dic storing attachment related data attachment = {} fname = get_attachment_fname(mlist, part) debug('get_attachment_fname:%s, type:%s', fname, type(fname)) attachment['name'] = fname attachment['orig'] = fname attachment['size'] = sizeof_fmt(partlen) debug('> att: %s', fname) # save attachment to the disk, at this stage duplicate name # are resolved path, url = save_attachment(mlist, part, dir) debug('> detached: %s %s', path, url) # remote storing, no trouble very simple code here using # secured FTP and the remote user config if 'disable_upload' in msgdata: debug('> uploading disabled') remote_fname = 'disabled' else: remote_fname = ftp_upload_attchment(mlist, path) # build the new url of the document, will be used when # modifying parts, see bellow. url = mlist.remote_http_base + remote_fname attachment['url'] = url reset_payload(part, 'removed', fname, url) seen_attachment.append(attachment) modified = True continue elif mutipartre.search(ctype): # match multipart/* boundary = part.get_boundary() debug('>>> is multipart part %s, boundary: %s', ctype, boundary) continue else: if boundary != None and part.get_boundary() == boundary: debug('same boundary skiped : %s', ctype) continue else: boundary = None debug('attachement : %s', ctype) debug('end of loop?? : %s', ctype) if not modified: return msg # rewrite content # d is a dict for simple storage of mutliple parameters # will be passed to the recursive func fix_msg() d = {} d['footer_attach'] = '' d['html_footer_attach'] = '' clip_cid = "clip.12345789" # the clip is already base64 encoded above d['clip'] = MIMEImage(ATTACH_CLIP, 'png', _encoder=encoders.encode_noop) d['clip']['Content-Transfer-Encoding'] = 'base64' d['clip'].add_header('Content-ID', '<part1.%s>' % clip_cid) replace = {} replace['CID_clip'] = 'part1.' + clip_cid # compose attachment url for att in seen_attachment: d['footer_attach'] += make_link(att) + "\n" replace['FNAME_replace'] = att['orig'] replace['URL_replace'] = att['url'] replace['SIZE_replace'] = att['size'] d['html_footer_attach'] += HTML_ATTACHMENT_CLIP_TPL % replace debug('================ start fix_msg() ==================') d['lcset'] = lcset d['lcset_out'] = lcset_out d['do_txt'] = True d['do_html'] = True fix_msg(msg, d) return msg
def fix_msg(msg, data): """ Scan the message recursively to replace the text/html by a multipart/related containing the original text/html and the new clip_payload png attachment. The attachment detected and moved at the first pass (with Header X-Mailman-Part) will be removed. """ if msg.is_multipart(): parts = msg.get_payload() # remove the next level parts, then process and reattach them msg.set_payload(None) for p in parts: # recursive call r = fix_msg(p, data) # don't embbed related twice if msg.get_content_type() == 'multipart/related' and \ r.get_content_type() == 'multipart/related': for newp in r.get_payload(): msg.attach(newp) elif r == None: # removed continue else: msg.attach(r) # finished return msg else: # process the 'leaf' parts ctype = msg.get_content_type() # will be used to write back payload with correct encoding charset = msg.get_content_charset() c = Charset('utf-8') c.body_encoding = QP debug('ctype:%s charset:%s', ctype, charset) if ctype == 'text/plain': if msg['X-Mailman-Part']: # remove it! return None if data['do_txt']: # A normal txt part, add footer to plain text new_footer = TXT_ATTACHT_REPLACE new_footer += data['footer_attach'] old_content = msg.get_payload(decode=True) debug('old_content:%s, new_footer:%s', \ type(old_content), type(new_footer)) del msg['Content-type'] del msg['content-transfer-encoding'] msg.set_payload(old_content + new_footer, charset=c) debug('add txt footer') data['do_txt'] = False return msg elif ctype == 'text/html' and data['do_html']: # build multipart/related for HTML, will be canceled by the # parent recursive call if needed related = MIMEMultipart('related') html_footer = HTML_ATTACHMENT_HOLDER % \ {'HTML_HERE': data['html_footer_attach'] } html_footer += '</body>' old_content = msg.get_payload(decode=True) new_content = re.sub(r'</body>', html_footer, old_content) if old_content != new_content: debug('add html footer') else: debug('no html footer added') del msg['content-transfer-encoding'] msg.set_payload(new_content, charset=c) related.attach(msg) related.attach(data['clip']) data['do_html'] = False return related # unmodified return msg
CRLFSPACE = '\r\n ' CRLF = '\r\n' NL = '\n' SPACE = ' ' USPACE = u' ' SPACE8 = ' ' * 8 EMPTYSTRING = '' UEMPTYSTRING = u'' MAXLINELEN = 76 ENCODE = 1 DECODE = 2 USASCII = Charset('us-ascii') UTF8 = Charset('utf-8') # Match encoded-word strings in the form =?charset?q?Hello_World?= ecre = re.compile( r''' =\? # literal =? (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? (?P<encoding>[qb]) # either a "q" or a "b", case insensitive \? # literal ? (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= ''', re.VERBOSE | re.IGNORECASE) pcre = re.compile('([,;])')