def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        j = Charset("euc-jp")
        g = Charset("iso-8859-1")
        h = Header("Hello World!")
        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
        ghello = 'Gr\xfc\xdf Gott!'
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
        eq(decode_header(h.encode()),
           [('Hello World!', None),
            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
        h = Header(long, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(h.__unicode__().encode('euc-jp'), long)
    def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        j = Charset("euc-jp")
        g = Charset("iso-8859-1")
        h = Header("Hello World!")
        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
        ghello = 'Gr\xfc\xdf Gott!'
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(
            h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
        eq(decode_header(h.encode()),
           [('Hello World!', None),
            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
        h = Header(long, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(
            enc, """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(h.__unicode__().encode('euc-jp'), long)
Exemple #3
0
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or settings.DEFAULT_CHARSET
    val = force_unicode(val)
    if '\n' in val or '\r' in val:
        raise BadHeaderError(
            "Header values can't contain newlines (got %r for header %r)" %
            (val, name))
    try:
        val = val.encode('ascii')
    except UnicodeEncodeError:
        if name.lower() in ('to', 'from', 'cc'):
            result = []
            for nm, addr in getaddresses((val, )):
                nm = str(Header(nm.encode(encoding), encoding))
                try:
                    addr = addr.encode('ascii')
                except UnicodeEncodeError:  # IDN
                    addr = str(Header(addr.encode(encoding), encoding))
                result.append(formataddr((nm, addr)))
            val = ', '.join(result)
        else:
            val = Header(val.encode(encoding), encoding)
    else:
        if name.lower() == 'subject':
            val = Header(val)
    return name, val
    def encode_addresses(addresses, header_name = None):
        """
        Unicode address headers are automatically encoded by
        email.Header, but not correctly. The correct way is to put the
        textual name inside quotes and the address inside brackets:

        To: "=?utf-8?b?encoded" <recipient@domain>

        Each address in addrs may be a tuple of (name, address) or
        just an address. Returns a tuple of (header, addrlist)
        representing the encoded header text and the list of plain
        text addresses.
        """

        header = []
        addrs = []

        for addr in addresses:
            if isinstance(addr, tuple):
                (name, addr) = addr
                try:
                    name = name.encode('ascii')
                    header.append('%s <%s>' % (name, addr))
                except:
                    h = Header(name, charset = "utf-8", header_name = header_name)
                    header.append('"%s" <%s>' % (h.encode(), addr))
            else:
                header.append(addr)
            addrs.append(addr)

        return (", ".join(header), addrs)
Exemple #5
0
 def __setitem__(self, name, val):
     "Forbids multi-line headers, to prevent header injection."
     if "\n" in val or "\r" in val:
         raise BadHeaderError, "Header values can't contain newlines (got %r for header %r)" % (val, name)
     if name == "Subject":
         val = Header(val.encode(settings.MAIL_CHARSET, "replace"), settings.MAIL_CHARSET)
     MIMEText.__setitem__(self, name, val)
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or settings.DEFAULT_CHARSET
    val = force_unicode(val)
    if '\n' in val or '\r' in val:
        raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name))
    try:
        val = val.encode('ascii')
    except UnicodeEncodeError:
        if name.lower() in ('to', 'from', 'cc'):
            result = []
            for nm, addr in getaddresses((val,)):
                nm = str(Header(nm.encode(encoding), encoding))
                try:
                    addr = addr.encode('ascii')
                except UnicodeEncodeError:  # IDN
                    addr = str(Header(addr.encode(encoding), encoding))
                result.append(formataddr((nm, addr)))
            val = ', '.join(result)
        else:
            val = Header(val.encode(encoding), encoding)
    else:
        if name.lower() == 'subject':
            val = Header(val)
    return name, val
Exemple #7
0
def buildmail(charset, fromaddr, toaddrs, subject, message):
    m_body = message.encode(charset, 'replace')
    m_subject = subject
    m_subject = Header(m_subject.encode(charset, 'replace'), charset)
    m_from = fromaddr
    m_to = ', '.join(toaddrs)
    message = MIMEText(m_body, 'plain', charset)
    message['Subject'] = m_subject
    message['From'] = m_from
    message['To'] = m_to
    return message
Exemple #8
0
 def add_header(self,key,value,immediate=False):
     """adds a header to the message. by default, headers will added when re-injecting the message back to postfix
     if you set immediate=True the message source will be replaced immediately. Only set this to true if a header must be
     visible to later plugins (eg. for spamassassin rules), otherwise, leave as False which is faster.
     """
     if immediate:
         val=unicode(value,errors='ignore')  # is ignore the right thing to do here?
         hdr=Header(val, header_name=key, continuation_ws=' ')
         hdrline="%s: %s\n"%(key,hdr.encode())
         src=hdrline+self.getSource()
         self.set_source(src)
     else:
         self.addheaders[key]=value
Exemple #9
0
 def add_header(self, key, value, immediate=False):
     """adds a header to the message. by default, headers will added when re-injecting the message back to postfix
     if you set immediate=True the message source will be replaced immediately. Only set this to true if a header must be
     visible to later plugins (eg. for spamassassin rules), otherwise, leave as False which is faster.
     """
     if immediate:
         # is ignore the right thing to do here?
         val = unicode(value, errors='ignore')
         hdr = Header(val, header_name=key, continuation_ws=' ')
         hdrline = "%s: %s\n" % (key, hdr.encode())
         src = hdrline + self.get_source()
         self.set_source(src)
     else:
         self.addheaders[key] = value
Exemple #10
0
    def get_addr_line(name, addr):
        '''Get the address line

:param str name: The display-name in the address.
:param str addr: The actual email address.
:returns: A correctly formatted mail header.
:rtype: str'''
        # --=mpj17=-- In Python 3 just using formataddr, sans the Header,
        #  will work. This method should be removed.
        unicodeName = to_unicode_or_bust(name)
        headerName = Header(unicodeName, UTF8)
        encodedName = headerName.encode()
        retval = formataddr((encodedName, addr))
        return retval
    def get_addr_line(name, addr):
        '''Get the address line

:param str name: The display-name in the address.
:param str addr: The actual email address.
:returns: A correctly formatted mail header.
:rtype: str'''
        # --=mpj17=-- In Python 3 just using formataddr, sans the Header,
        #  will work. This method should be removed.
        unicodeName = to_unicode_or_bust(name)
        headerName = Header(unicodeName, UTF8)
        encodedName = headerName.encode()
        retval = formataddr((encodedName, addr))
        return retval
def buildmsgsource(suspect):
    """Build the message source with fuglu headers prepended"""
    #we must prepend headers manually as we can't set a header order in email objects
    origmsgtxt=suspect.getSource()
    newheaders=""
    
    for key in suspect.addheaders:
        val=unicode(suspect.addheaders[key],errors='ignore')  # is ignore the right thing to do here?
        #self.logger.debug('Adding header %s : %s'%(key,val))
        hdr=Header(val, header_name=key, continuation_ws=' ')
        newheaders+="%s: %s\n"%(key,hdr.encode())
    
    modifiedtext=newheaders+origmsgtxt
    return modifiedtext
Exemple #13
0
def buildmsgsource(suspect):
    """Build the message source with fuglu headers prepended"""
    # we must prepend headers manually as we can't set a header order in email
    # objects
    origmsgtxt = suspect.get_source()
    newheaders = ""

    for key in suspect.addheaders:
        # is ignore the right thing to do here?
        val = unicode(suspect.addheaders[key], errors='ignore')
        #self.logger.debug('Adding header %s : %s'%(key,val))
        hdr = Header(val, header_name=key, continuation_ws=' ')
        newheaders += "%s: %s\n" % (key, hdr.encode())

    modifiedtext = newheaders + origmsgtxt
    return modifiedtext
Exemple #14
0
def main(args):
  msg1 = Message()
  msg1.set_charset('iso-2022-jp')
  msg1['From'] = Header(u'Yusuke Shinyama <*****@*****.**>', 'iso-2022-jp')
  msg1['To'] = Header(u'きょうから明日です <today@tomorrow>', 'iso-2022-jp')
  msg1['Subject'] = Header(u'ムーミン谷のみなさんへ', 'iso-2022-jp')
  msg1['Date'] = 'Thu, 31 Aug 2004 03:06:09 +0900'
  msg1.set_payload(u'その逆だ!'.encode('iso-2022-jp'), 'iso-2022-jp')
  fp = file(args.pop(0), 'wb')
  fp.write(msg1.as_string(0))
  fp.close()

  msg2 = MIMEMultipart()
  msg2.set_charset('utf-8')
  msg2['From'] = Header(u'えうすけ <*****@*****.**>', 'iso-2022-jp')
  msg2['To'] = Header(u'だれでも <any@one>', 'utf-8')
  msg2['Subject'] = Header(u'何を見てるんだい?', 'iso-2022-jp')
  msg2['Date'] = 'Thu, 29 Feb 2004 19:23:34 +0500'
  text1 = MIMEText(u'ああそうか、\nこれは夢なんだ。'.encode('utf-8'), 'plain', 'utf-8')
  text2 = MIMEText(u'<html><body>\n<strong>HEY!</strong>\n<p>do you wanna feel unconfortably energetic?\n</body></html>', 'html')
  h = Header(u'ふうばあ ばず', 'iso-2022-jp')
  text2.add_header('Content-Disposition', 'attachment', filename=h.encode())
  msg2.set_payload([text1, text2])
  fp = file(args.pop(0), 'wb')
  fp.write(msg2.as_string(0))
  fp.close()

  msg3 = MIMEMultipart()
  msg3['From'] = '=?iso-2022-jp?b?Gy?= \xff\xaa\x88'
  msg3['Subject'] = 'huh?'
  msg3['Date'] = 'Tue, 25 Nov 2008 01:00:09 +0900'
  parts = MIMEMultipart()
  parts.set_payload([MIMEText('part1'), MIMEText('part2')])
  msg4 = Message()
  msg4.set_charset('iso-2022-jp')
  msg4['From'] = Header(u'john doe <*****@*****.**>', 'iso-2022-jp')
  msg4['To'] = Header(u'どこだって? <where@where>', 'iso-2022-jp')
  msg4['Subject'] = Header(u'その先の日本へ', 'iso-2022-jp')
  msg4['Date'] = 'Sun, 31 Aug 2008 12:20:33 +0900'
  msg4.set_payload(u'ししかばう゛ー'.encode('iso-2022-jp'), 'iso-2022-jp')
  msg3.set_payload([parts, MIMEMessage(msg4)])
  fp = file(args.pop(0), 'wb')
  fp.write(msg3.as_string(0))
  fp.close()

  return
Exemple #15
0
    def _encodedHeader(value, encoding):
        """
        Given a value (or list of values) and an ecoding, return it
        encoded as per rfc2047 for use in a MIME message header.

        >>> from Products.listen.content.mailboxer_list import MailBoxerMailingList

        If the input can be converted to ascii, it will be, regardless
        of the encoding argument:

        >>> MailBoxerMailingList._encodedHeader('blah', 'utf8')
        'blah'

        If it can be encoded to the target encoding, it will be, and
        then encoded as per rfc2047:

        >>> input = u'\xbfhmm?'
        >>> MailBoxerMailingList._encodedHeader(input, 'utf8')
        '=?utf8?b?wr9obW0/?='
        >>> MailBoxerMailingList._encodedHeader(input.encode('utf8'), 'utf8')
        '=?utf8?b?wr9obW0/?='
        >>> raw = 'a string \345\276\267\345\233\275'
        >>> MailBoxerMailingList._encodedHeader(raw, 'utf8')
        '=?utf8?b?YSBzdHJpbmcg5b635Zu9?='

        All other cases will raise an exception. Typically this means
        a raw byte string in an incompatible encoding:

        >>> MailBoxerMailingList._encodedHeader(input.encode('latin1'), 'utf8')
        Traceback (most recent call last):
        ...
        UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position 0: unexpected code byte
        """
        try:
            value = value.encode('ascii')
        except (UnicodeEncodeError, UnicodeDecodeError):
            try:
                value = Header(value.encode(encoding), encoding).encode()
            except UnicodeDecodeError:
                try:
                    value = Header(value, encoding).encode()
                except UnicodeDecodeError:
                    logger.error("Could not guess encoding of raw bytestring %r, there is probably a bug in the code that created this header." % value)
                    raise
        return value
    def _encodedHeader(value, encoding):
        """
        Given a value (or list of values) and an ecoding, return it
        encoded as per rfc2047 for use in a MIME message header.

        >>> from Products.listen.content.mailboxer_list import MailBoxerMailingList

        If the input can be converted to ascii, it will be, regardless
        of the encoding argument:

        >>> MailBoxerMailingList._encodedHeader('blah', 'utf8')
        'blah'

        If it can be encoded to the target encoding, it will be, and
        then encoded as per rfc2047:

        >>> input = u'\xbfhmm?'
        >>> MailBoxerMailingList._encodedHeader(input, 'utf8')
        '=?utf8?b?wr9obW0/?='
        >>> MailBoxerMailingList._encodedHeader(input.encode('utf8'), 'utf8')
        '=?utf8?b?wr9obW0/?='
        >>> raw = 'a string \345\276\267\345\233\275'
        >>> MailBoxerMailingList._encodedHeader(raw, 'utf8')
        '=?utf8?b?YSBzdHJpbmcg5b635Zu9?='

        All other cases will raise an exception. Typically this means
        a raw byte string in an incompatible encoding:

        >>> MailBoxerMailingList._encodedHeader(input.encode('latin1'), 'utf8')
        Traceback (most recent call last):
        ...
        UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position 0: unexpected code byte
        """
        try:
            value = value.encode('ascii')
        except (UnicodeEncodeError, UnicodeDecodeError):
            try:
                value = Header(value.encode(encoding), encoding).encode()
            except UnicodeDecodeError:
                try:
                    value = Header(value, encoding).encode()
                except UnicodeDecodeError:
                    logger.error("Could not guess encoding of raw bytestring %r, there is probably a bug in the code that created this header." % value)
                    raise
        return value
Exemple #17
0
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or "utf-8"
    val = force_unicode(val, encoding)
    if "\n" in val or "\r" in val:
        raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name))
    try:
        val = val.encode("ascii")
    except UnicodeEncodeError:
        if name.lower() in ("to", "from", "cc"):
            result = []
            for nm, addr in getaddresses((val,)):
                nm = str(Header(nm.encode(encoding), encoding))
                result.append(formataddr((nm, str(addr))))
            val = ", ".join(result)
        else:
            val = Header(val.encode(encoding), encoding)
    else:
        if name.lower() == "subject":
            val = Header(val)
    return name, val
Exemple #18
0
  def _process_utf8(self,kw):
      # sort out what encoding we're going to use
      encoding = kw.get('encoding',
                        self.getProperty('encoding',
                                         BaseMailTemplate.default_encoding))
      text = self.__class__.__bases__[1].__call__(self,**kw)
      # ZPT adds newline at the end, but it breaks backward compatibility.
      # So I remove it.
      if text and text[-1]=='\n':
        text = text[:-1]
      if not self.html() and isinstance(text, unicode):
          text = text.encode(encoding,'replace')
      # now turn the result into a MIMEText object
      msg = BaseMailTemplate.MIMEText(
          text.replace('\r',''),
          self.content_type.split('/')[1],
          encoding
          )
      # sort out what headers and addresses we're going to use
      headers = {}
      values = {}
      # headers from the headers property
      for header in getattr(self,'headers',()):
          name,value = header.split(':',1)
          headers[name]=value
      # headers from the headers parameter
      headers_param = kw.get('headers',{})
      headers.update(headers_param)
      # values and some specific headers
      for key,header in (('mfrom','From'),
                         ('mto','To'),
                         ('mcc','Cc'),
                         ('mbcc','Bcc'),
                         ('subject','Subject')):
          value = kw.get(key,
                         headers_param.get(header,
                                           getattr(self,
                                                   key,
                                                   headers.get(header))))
          if value is not None:
              values[key]=value

              # turn some sequences in coma-seperated strings
              if isinstance(value, (tuple, list)):
                  value = ', '.join(value)
              # make sure we have no unicode headers
              if isinstance(value,unicode):
                  value = value.encode(encoding)

              if key == 'subject':
                  try:
                      # Try to keep header non encoded
                      value = Header(value.encode("ascii"))
                  except UnicodeDecodeError:
                      value = Header(value, "UTF-8")

              else:
                  value_list = getaddresses([value])
                  dest_list = []
                  for name, email in value_list:
                      try:
                          name = Header(name.encode("ascii"))
                      except UnicodeDecodeError:
                          name = Header(name, "UTF-8")
                      dest_list.append(formataddr((name.encode(), email)))
                  value = ", ".join(dest_list)

              headers[header]=value
      # check required values have been supplied
      errors = []
      for param in ('mfrom','mto'):
          if not values.get(param):
              errors.append(param)
      if errors:
          raise TypeError(
              'The following parameters were required by not specified: '+(
              ', '.join(errors)
              ))
      # add date header
      headers['Date']=BaseMailTemplate.DateTime().rfc822()
      # add message-id header
      headers['Message-ID']=make_msgid()
      # turn headers into an ordered list for predictable header order
      keys = headers.keys()
      keys.sort()
      return msg,values,[(key,headers[key]) for key in keys]
Exemple #19
0
    def _process(self, kw):
        # sort out what encoding we're going to use
        encoding = kw.get('encoding',
                          self.getProperty('encoding', default_encoding))
        text = self.__class__.__bases__[1].__call__(self, **kw)
        # ZPT adds newline at the end, but it breaks backward compatibility.
        # So I remove it.
        if text.endswith('\n'):
            text = text[:-1]
        if not self.html() and isinstance(text, unicode):
            text = text.encode(encoding, 'replace')
        # now turn the result into a MIMEText object
        msg = MIMEText(text.replace('\r', ''),
                       self.content_type.split('/')[1], encoding)
        # sort out what headers and addresses we're going to use
        headers = {}
        values = {}
        # headers from the headers property
        for header in getattr(self, 'headers', ()):
            name, value = header.split(':', 1)
            headers[name] = value
        # headers from the headers parameter
        headers_param = kw.get('headers', {})
        headers.update(headers_param)
        # values and some specific headers
        for key, header in (('mfrom', 'From'), ('mto', 'To'), ('mcc', 'Cc'),
                            ('mbcc', 'Bcc'), ('subject', 'Subject')):
            value = kw.get(
                key,
                headers_param.get(header,
                                  getattr(self, key, headers.get(header))))
            if value is not None:
                values[key] = value

                if key == 'subject':
                    try:
                        # Try to keep header non encoded
                        value = Header(value)
                    except UnicodeDecodeError:
                        value = Header(value, "UTF-8")

                else:
                    dest_list = []
                    for name, email in getaddresses(
                        (value, ) if isinstance(value, basestring) else value):
                        try:
                            name = Header(name)
                        except UnicodeDecodeError:
                            name = Header(name, "UTF-8")
                        dest_list.append(formataddr((name.encode(), email)))
                    value = ", ".join(dest_list)

                headers[header] = value
        # check required values have been supplied
        errors = []
        for param in ('mfrom', 'mto'):
            if not values.get(param):
                errors.append(param)
        if errors:
            raise TypeError(
                'The following parameters were required by not specified: ' +
                (', '.join(errors)))
        # add date header
        headers['Date'] = DateTime().rfc822()
        # do not let the MTA to generate the Message-ID:
        # we want to have it stored in ERP5, for mail threading
        headers['Message-ID'] = make_msgid()
        # turn headers into an ordered list for predictable header order
        keys = headers.keys()
        keys.sort()
        return msg, values, [(key, headers[key]) for key in keys]