def test_japanese_codecs(self): eq = self.ndiffAssertEqual j = Charset("euc-jp") g = Charset("iso-8859-1") h = Header("Hello World!") jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' ghello = 'Gr\xfc\xdf Gott!' h.append(jhello, j) h.append(ghello, g) # BAW: This used to -- and maybe should -- fold the two iso-8859-1 # chunks into a single encoded word. However it doesn't violate the # standard to have them as two encoded chunks and maybe it's # reasonable <wink> for each .append() call to result in a separate # encoded word. eq(h.encode(), """\ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") eq(decode_header(h.encode()), [('Hello World!', None), ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' h = Header(long, j, header_name="Subject") # test a very long header enc = h.encode() # TK: splitting point may differ by codec design and/or Header encoding eq(enc , """\ =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") # TK: full decode comparison eq(h.__unicode__().encode('euc-jp'), long)
def test_japanese_codecs(self): eq = self.ndiffAssertEqual j = Charset("euc-jp") g = Charset("iso-8859-1") h = Header("Hello World!") jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' ghello = 'Gr\xfc\xdf Gott!' h.append(jhello, j) h.append(ghello, g) # BAW: This used to -- and maybe should -- fold the two iso-8859-1 # chunks into a single encoded word. However it doesn't violate the # standard to have them as two encoded chunks and maybe it's # reasonable <wink> for each .append() call to result in a separate # encoded word. eq( h.encode(), """\ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") eq(decode_header(h.encode()), [('Hello World!', None), ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' h = Header(long, j, header_name="Subject") # test a very long header enc = h.encode() # TK: splitting point may differ by codec design and/or Header encoding eq( enc, """\ =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") # TK: full decode comparison eq(h.__unicode__().encode('euc-jp'), long)
def forbid_multi_line_headers(name, val, encoding): """Forbids multi-line headers, to prevent header injection.""" encoding = encoding or settings.DEFAULT_CHARSET val = force_unicode(val) if '\n' in val or '\r' in val: raise BadHeaderError( "Header values can't contain newlines (got %r for header %r)" % (val, name)) try: val = val.encode('ascii') except UnicodeEncodeError: if name.lower() in ('to', 'from', 'cc'): result = [] for nm, addr in getaddresses((val, )): nm = str(Header(nm.encode(encoding), encoding)) try: addr = addr.encode('ascii') except UnicodeEncodeError: # IDN addr = str(Header(addr.encode(encoding), encoding)) result.append(formataddr((nm, addr))) val = ', '.join(result) else: val = Header(val.encode(encoding), encoding) else: if name.lower() == 'subject': val = Header(val) return name, val
def encode_addresses(addresses, header_name = None): """ Unicode address headers are automatically encoded by email.Header, but not correctly. The correct way is to put the textual name inside quotes and the address inside brackets: To: "=?utf-8?b?encoded" <recipient@domain> Each address in addrs may be a tuple of (name, address) or just an address. Returns a tuple of (header, addrlist) representing the encoded header text and the list of plain text addresses. """ header = [] addrs = [] for addr in addresses: if isinstance(addr, tuple): (name, addr) = addr try: name = name.encode('ascii') header.append('%s <%s>' % (name, addr)) except: h = Header(name, charset = "utf-8", header_name = header_name) header.append('"%s" <%s>' % (h.encode(), addr)) else: header.append(addr) addrs.append(addr) return (", ".join(header), addrs)
def __setitem__(self, name, val): "Forbids multi-line headers, to prevent header injection." if "\n" in val or "\r" in val: raise BadHeaderError, "Header values can't contain newlines (got %r for header %r)" % (val, name) if name == "Subject": val = Header(val.encode(settings.MAIL_CHARSET, "replace"), settings.MAIL_CHARSET) MIMEText.__setitem__(self, name, val)
def forbid_multi_line_headers(name, val, encoding): """Forbids multi-line headers, to prevent header injection.""" encoding = encoding or settings.DEFAULT_CHARSET val = force_unicode(val) if '\n' in val or '\r' in val: raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name)) try: val = val.encode('ascii') except UnicodeEncodeError: if name.lower() in ('to', 'from', 'cc'): result = [] for nm, addr in getaddresses((val,)): nm = str(Header(nm.encode(encoding), encoding)) try: addr = addr.encode('ascii') except UnicodeEncodeError: # IDN addr = str(Header(addr.encode(encoding), encoding)) result.append(formataddr((nm, addr))) val = ', '.join(result) else: val = Header(val.encode(encoding), encoding) else: if name.lower() == 'subject': val = Header(val) return name, val
def buildmail(charset, fromaddr, toaddrs, subject, message): m_body = message.encode(charset, 'replace') m_subject = subject m_subject = Header(m_subject.encode(charset, 'replace'), charset) m_from = fromaddr m_to = ', '.join(toaddrs) message = MIMEText(m_body, 'plain', charset) message['Subject'] = m_subject message['From'] = m_from message['To'] = m_to return message
def add_header(self,key,value,immediate=False): """adds a header to the message. by default, headers will added when re-injecting the message back to postfix if you set immediate=True the message source will be replaced immediately. Only set this to true if a header must be visible to later plugins (eg. for spamassassin rules), otherwise, leave as False which is faster. """ if immediate: val=unicode(value,errors='ignore') # is ignore the right thing to do here? hdr=Header(val, header_name=key, continuation_ws=' ') hdrline="%s: %s\n"%(key,hdr.encode()) src=hdrline+self.getSource() self.set_source(src) else: self.addheaders[key]=value
def add_header(self, key, value, immediate=False): """adds a header to the message. by default, headers will added when re-injecting the message back to postfix if you set immediate=True the message source will be replaced immediately. Only set this to true if a header must be visible to later plugins (eg. for spamassassin rules), otherwise, leave as False which is faster. """ if immediate: # is ignore the right thing to do here? val = unicode(value, errors='ignore') hdr = Header(val, header_name=key, continuation_ws=' ') hdrline = "%s: %s\n" % (key, hdr.encode()) src = hdrline + self.get_source() self.set_source(src) else: self.addheaders[key] = value
def get_addr_line(name, addr): '''Get the address line :param str name: The display-name in the address. :param str addr: The actual email address. :returns: A correctly formatted mail header. :rtype: str''' # --=mpj17=-- In Python 3 just using formataddr, sans the Header, # will work. This method should be removed. unicodeName = to_unicode_or_bust(name) headerName = Header(unicodeName, UTF8) encodedName = headerName.encode() retval = formataddr((encodedName, addr)) return retval
def buildmsgsource(suspect): """Build the message source with fuglu headers prepended""" #we must prepend headers manually as we can't set a header order in email objects origmsgtxt=suspect.getSource() newheaders="" for key in suspect.addheaders: val=unicode(suspect.addheaders[key],errors='ignore') # is ignore the right thing to do here? #self.logger.debug('Adding header %s : %s'%(key,val)) hdr=Header(val, header_name=key, continuation_ws=' ') newheaders+="%s: %s\n"%(key,hdr.encode()) modifiedtext=newheaders+origmsgtxt return modifiedtext
def buildmsgsource(suspect): """Build the message source with fuglu headers prepended""" # we must prepend headers manually as we can't set a header order in email # objects origmsgtxt = suspect.get_source() newheaders = "" for key in suspect.addheaders: # is ignore the right thing to do here? val = unicode(suspect.addheaders[key], errors='ignore') #self.logger.debug('Adding header %s : %s'%(key,val)) hdr = Header(val, header_name=key, continuation_ws=' ') newheaders += "%s: %s\n" % (key, hdr.encode()) modifiedtext = newheaders + origmsgtxt return modifiedtext
def main(args): msg1 = Message() msg1.set_charset('iso-2022-jp') msg1['From'] = Header(u'Yusuke Shinyama <*****@*****.**>', 'iso-2022-jp') msg1['To'] = Header(u'きょうから明日です <today@tomorrow>', 'iso-2022-jp') msg1['Subject'] = Header(u'ムーミン谷のみなさんへ', 'iso-2022-jp') msg1['Date'] = 'Thu, 31 Aug 2004 03:06:09 +0900' msg1.set_payload(u'その逆だ!'.encode('iso-2022-jp'), 'iso-2022-jp') fp = file(args.pop(0), 'wb') fp.write(msg1.as_string(0)) fp.close() msg2 = MIMEMultipart() msg2.set_charset('utf-8') msg2['From'] = Header(u'えうすけ <*****@*****.**>', 'iso-2022-jp') msg2['To'] = Header(u'だれでも <any@one>', 'utf-8') msg2['Subject'] = Header(u'何を見てるんだい?', 'iso-2022-jp') msg2['Date'] = 'Thu, 29 Feb 2004 19:23:34 +0500' text1 = MIMEText(u'ああそうか、\nこれは夢なんだ。'.encode('utf-8'), 'plain', 'utf-8') text2 = MIMEText(u'<html><body>\n<strong>HEY!</strong>\n<p>do you wanna feel unconfortably energetic?\n</body></html>', 'html') h = Header(u'ふうばあ ばず', 'iso-2022-jp') text2.add_header('Content-Disposition', 'attachment', filename=h.encode()) msg2.set_payload([text1, text2]) fp = file(args.pop(0), 'wb') fp.write(msg2.as_string(0)) fp.close() msg3 = MIMEMultipart() msg3['From'] = '=?iso-2022-jp?b?Gy?= \xff\xaa\x88' msg3['Subject'] = 'huh?' msg3['Date'] = 'Tue, 25 Nov 2008 01:00:09 +0900' parts = MIMEMultipart() parts.set_payload([MIMEText('part1'), MIMEText('part2')]) msg4 = Message() msg4.set_charset('iso-2022-jp') msg4['From'] = Header(u'john doe <*****@*****.**>', 'iso-2022-jp') msg4['To'] = Header(u'どこだって? <where@where>', 'iso-2022-jp') msg4['Subject'] = Header(u'その先の日本へ', 'iso-2022-jp') msg4['Date'] = 'Sun, 31 Aug 2008 12:20:33 +0900' msg4.set_payload(u'ししかばう゛ー'.encode('iso-2022-jp'), 'iso-2022-jp') msg3.set_payload([parts, MIMEMessage(msg4)]) fp = file(args.pop(0), 'wb') fp.write(msg3.as_string(0)) fp.close() return
def _encodedHeader(value, encoding): """ Given a value (or list of values) and an ecoding, return it encoded as per rfc2047 for use in a MIME message header. >>> from Products.listen.content.mailboxer_list import MailBoxerMailingList If the input can be converted to ascii, it will be, regardless of the encoding argument: >>> MailBoxerMailingList._encodedHeader('blah', 'utf8') 'blah' If it can be encoded to the target encoding, it will be, and then encoded as per rfc2047: >>> input = u'\xbfhmm?' >>> MailBoxerMailingList._encodedHeader(input, 'utf8') '=?utf8?b?wr9obW0/?=' >>> MailBoxerMailingList._encodedHeader(input.encode('utf8'), 'utf8') '=?utf8?b?wr9obW0/?=' >>> raw = 'a string \345\276\267\345\233\275' >>> MailBoxerMailingList._encodedHeader(raw, 'utf8') '=?utf8?b?YSBzdHJpbmcg5b635Zu9?=' All other cases will raise an exception. Typically this means a raw byte string in an incompatible encoding: >>> MailBoxerMailingList._encodedHeader(input.encode('latin1'), 'utf8') Traceback (most recent call last): ... UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position 0: unexpected code byte """ try: value = value.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): try: value = Header(value.encode(encoding), encoding).encode() except UnicodeDecodeError: try: value = Header(value, encoding).encode() except UnicodeDecodeError: logger.error("Could not guess encoding of raw bytestring %r, there is probably a bug in the code that created this header." % value) raise return value
def forbid_multi_line_headers(name, val, encoding): """Forbids multi-line headers, to prevent header injection.""" encoding = encoding or "utf-8" val = force_unicode(val, encoding) if "\n" in val or "\r" in val: raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name)) try: val = val.encode("ascii") except UnicodeEncodeError: if name.lower() in ("to", "from", "cc"): result = [] for nm, addr in getaddresses((val,)): nm = str(Header(nm.encode(encoding), encoding)) result.append(formataddr((nm, str(addr)))) val = ", ".join(result) else: val = Header(val.encode(encoding), encoding) else: if name.lower() == "subject": val = Header(val) return name, val
def _process_utf8(self,kw): # sort out what encoding we're going to use encoding = kw.get('encoding', self.getProperty('encoding', BaseMailTemplate.default_encoding)) text = self.__class__.__bases__[1].__call__(self,**kw) # ZPT adds newline at the end, but it breaks backward compatibility. # So I remove it. if text and text[-1]=='\n': text = text[:-1] if not self.html() and isinstance(text, unicode): text = text.encode(encoding,'replace') # now turn the result into a MIMEText object msg = BaseMailTemplate.MIMEText( text.replace('\r',''), self.content_type.split('/')[1], encoding ) # sort out what headers and addresses we're going to use headers = {} values = {} # headers from the headers property for header in getattr(self,'headers',()): name,value = header.split(':',1) headers[name]=value # headers from the headers parameter headers_param = kw.get('headers',{}) headers.update(headers_param) # values and some specific headers for key,header in (('mfrom','From'), ('mto','To'), ('mcc','Cc'), ('mbcc','Bcc'), ('subject','Subject')): value = kw.get(key, headers_param.get(header, getattr(self, key, headers.get(header)))) if value is not None: values[key]=value # turn some sequences in coma-seperated strings if isinstance(value, (tuple, list)): value = ', '.join(value) # make sure we have no unicode headers if isinstance(value,unicode): value = value.encode(encoding) if key == 'subject': try: # Try to keep header non encoded value = Header(value.encode("ascii")) except UnicodeDecodeError: value = Header(value, "UTF-8") else: value_list = getaddresses([value]) dest_list = [] for name, email in value_list: try: name = Header(name.encode("ascii")) except UnicodeDecodeError: name = Header(name, "UTF-8") dest_list.append(formataddr((name.encode(), email))) value = ", ".join(dest_list) headers[header]=value # check required values have been supplied errors = [] for param in ('mfrom','mto'): if not values.get(param): errors.append(param) if errors: raise TypeError( 'The following parameters were required by not specified: '+( ', '.join(errors) )) # add date header headers['Date']=BaseMailTemplate.DateTime().rfc822() # add message-id header headers['Message-ID']=make_msgid() # turn headers into an ordered list for predictable header order keys = headers.keys() keys.sort() return msg,values,[(key,headers[key]) for key in keys]
def _process(self, kw): # sort out what encoding we're going to use encoding = kw.get('encoding', self.getProperty('encoding', default_encoding)) text = self.__class__.__bases__[1].__call__(self, **kw) # ZPT adds newline at the end, but it breaks backward compatibility. # So I remove it. if text.endswith('\n'): text = text[:-1] if not self.html() and isinstance(text, unicode): text = text.encode(encoding, 'replace') # now turn the result into a MIMEText object msg = MIMEText(text.replace('\r', ''), self.content_type.split('/')[1], encoding) # sort out what headers and addresses we're going to use headers = {} values = {} # headers from the headers property for header in getattr(self, 'headers', ()): name, value = header.split(':', 1) headers[name] = value # headers from the headers parameter headers_param = kw.get('headers', {}) headers.update(headers_param) # values and some specific headers for key, header in (('mfrom', 'From'), ('mto', 'To'), ('mcc', 'Cc'), ('mbcc', 'Bcc'), ('subject', 'Subject')): value = kw.get( key, headers_param.get(header, getattr(self, key, headers.get(header)))) if value is not None: values[key] = value if key == 'subject': try: # Try to keep header non encoded value = Header(value) except UnicodeDecodeError: value = Header(value, "UTF-8") else: dest_list = [] for name, email in getaddresses( (value, ) if isinstance(value, basestring) else value): try: name = Header(name) except UnicodeDecodeError: name = Header(name, "UTF-8") dest_list.append(formataddr((name.encode(), email))) value = ", ".join(dest_list) headers[header] = value # check required values have been supplied errors = [] for param in ('mfrom', 'mto'): if not values.get(param): errors.append(param) if errors: raise TypeError( 'The following parameters were required by not specified: ' + (', '.join(errors))) # add date header headers['Date'] = DateTime().rfc822() # do not let the MTA to generate the Message-ID: # we want to have it stored in ERP5, for mail threading headers['Message-ID'] = make_msgid() # turn headers into an ordered list for predictable header order keys = headers.keys() keys.sort() return msg, values, [(key, headers[key]) for key in keys]