Ejemplo n.º 1
0
 def _fold(self, name, value, sanitize):
     parts = []
     parts.append('%s: ' % name)
     if isinstance(value, str):
         if _has_surrogates(value):
             if sanitize:
                 h = header.Header(value,
                                   charset=_charset.UNKNOWN8BIT,
                                   header_name=name)
             else:
                 # If we have raw 8bit data in a byte string, we have no idea
                 # what the encoding is.  There is no safe way to split this
                 # string.  If it's ascii-subset, then we could do a normal
                 # ascii split, but if it's multibyte then we could break the
                 # string.  There's no way to know so the least harm seems to
                 # be to not split the string and risk it being too long.
                 parts.append(value)
                 h = None
         else:
             h = header.Header(value, header_name=name)
     else:
         # Assume it is a Header-like object.
         h = value
     if h is not None:
         parts.append(h.encode(linesep=self.linesep,
                               maxlinelen=self.max_line_length))
     parts.append(self.linesep)
     return ''.join(parts)
Ejemplo n.º 2
0
 def _fold(self, name, value, sanitize):
     parts = []
     parts.append('%s: ' % name)
     if isinstance(value, str):
         if _has_surrogates(value):
             if sanitize:
                 h = header.Header(value,
                                   charset=_charset.UNKNOWN8BIT,
                                   header_name=name)
             else:
                 # If we have raw 8bit data in a byte string, we have no idea
                 # what the encoding is.  There is no safe way to split this
                 # string.  If it's ascii-subset, then we could do a normal
                 # ascii split, but if it's multibyte then we could break the
                 # string.  There's no way to know so the least harm seems to
                 # be to not split the string and risk it being too long.
                 parts.append(value)
                 h = None
         else:
             h = header.Header(value, header_name=name)
     else:
         # Assume it is a Header-like object.
         h = value
     if h is not None:
         # The Header class interprets a value of None for maxlinelen as the
         # default value of 78, as recommended by RFC 2822.
         maxlinelen = 0
         if self.max_line_length is not None:
             maxlinelen = self.max_line_length
         parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
     parts.append(self.linesep)
     return ''.join(parts)
Ejemplo n.º 3
0
 def _sanitize_header(self, name, value):
     if not isinstance(value, str):
         return value
     elif _has_surrogates(value):
         return header.Header(value, charset=(_charset.UNKNOWN8BIT), header_name=name)
     else:
         return value
Ejemplo n.º 4
0
 def _fold(self, name, value, refold_binary=False):
     if hasattr(value, 'name'):
         return value.fold(policy=self)
     maxlen = self.max_line_length if self.max_line_length else float('inf')
     lines = value.splitlines()
     refold = self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0]) + len(name) + 2 > maxlen or any(len(x) > maxlen for x in lines[1:]))
     if refold or refold_binary and _has_surrogates(value):
         return self.header_factory(name, ''.join(lines)).fold(policy=self)
     return name + ': ' + self.linesep.join(lines) + self.linesep
Ejemplo n.º 5
0
 def _handle_text(self, msg):
     # If the string has surrogates the original source was bytes, so
     # just write it back out.
     if msg._payload is None:
         return
     if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
         self.write(msg._payload)
     else:
         super(BytesGenerator,self)._handle_text(msg)
Ejemplo n.º 6
0
 def __new__(cls, name, value):
     kwds = {"defects": []}
     cls.parse(value, kwds)
     if utils._has_surrogates(kwds["decoded"]):
         kwds["decoded"] = utils._sanitize(kwds["decoded"])
     self = str.__new__(cls, kwds["decoded"])
     del kwds["decoded"]
     self.init(name, **kwds)
     return self
Ejemplo n.º 7
0
 def __new__(cls, name, value):
     kwds = {'defects': []}
     cls.parse(value, kwds)
     if utils._has_surrogates(kwds['decoded']):
         kwds['decoded'] = utils._sanitize(kwds['decoded'])
     self = str.__new__(cls, kwds['decoded'])
     del kwds['decoded']
     self.init(name, **kwds)
     return self
Ejemplo n.º 8
0
 def _sanitize_header(self, name, value):
     # If the header value contains surrogates, return a Header using
     # the unknown-8bit charset to encode the bytes as encoded words.
     if not isinstance(value, str):
         # Assume it is already a header object
         return value
     if _has_surrogates(value):
         return header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name)
     else:
         return value
Ejemplo n.º 9
0
 def _fold(self, name, value, refold_binary=False):
     if hasattr(value, 'name'):
         return value.fold(policy=self)
     maxlen = self.max_line_length if self.max_line_length else float('inf')
     lines = value.splitlines()
     refold = self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0]) + len(name) + 2 > maxlen or any(len(x) > maxlen for x in lines[1:]))
     if refold or refold_binary and _has_surrogates(value):
         return self.header_factory(name, ''.join(lines)).fold(policy=self)
     else:
         return name + ': ' + self.linesep.join(lines) + self.linesep
Ejemplo n.º 10
0
 def _handle_text(self, msg):
     if msg._payload is None:
         return
     if _has_surrogates(
             msg._payload) and not self.policy.cte_type == '7bit':
         if self._mangle_from_:
             msg._payload = fcre.sub('>From ', msg._payload)
         self._write_lines(msg._payload)
     else:
         super(BytesGenerator, self)._handle_text(msg)
Ejemplo n.º 11
0
 def _handle_text(self, msg):
     # If the string has surrogates the original source was bytes, so
     # just write it back out.
     if msg._payload is None:
         return
     if _has_surrogates(
             msg._payload) and not self.policy.cte_type == '7bit':
         self.write(msg._payload)
     else:
         super(BytesGenerator, self)._handle_text(msg)
Ejemplo n.º 12
0
 def _handle_text(self, msg):
     # If the string has surrogates the original source was bytes, so
     # just write it back out.
     if msg._payload is None:
         return
     if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
         if self._mangle_from_:
             msg._payload = fcre.sub(">From ", msg._payload)
         self._write_lines(msg._payload)
     else:
         super(BytesGenerator,self)._handle_text(msg)
Ejemplo n.º 13
0
 def _sanitize_header(self, name, value):
     # If the header value contains surrogates, return a Header using
     # the unknown-8bit charset to encode the bytes as encoded words.
     if not isinstance(value, str):
         # Assume it is already a header object
         return value
     if _has_surrogates(value):
         return header.Header(value, charset=_charset.UNKNOWN8BIT,
                              header_name=name)
     else:
         return value
Ejemplo n.º 14
0
 def _handle_text(self, msg):
     # If the string has surrogates the original source was bytes, so
     # just write it back out.
     if msg._payload is None:
         return
     if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
         if self._mangle_from_:
             msg._payload = fcre.sub(">From ", msg._payload)
         self._write_lines(msg._payload)
     else:
         super(BytesGenerator,self)._handle_text(msg)
Ejemplo n.º 15
0
 def get_payload(self, i=None, decode=False):
     if self.is_multipart():
         if decode:
             return
         if i is None:
             return self._payload
         return self._payload[i]
     if i is not None and not isinstance(self._payload, list):
         raise TypeError('Expected list, got %s' % type(self._payload))
     payload = self._payload
     cte = str(self.get('content-transfer-encoding', '')).lower()
     if isinstance(payload, str):
         if utils._has_surrogates(payload):
             bpayload = payload.encode('ascii', 'surrogateescape')
             if not decode:
                 try:
                     payload = bpayload.decode(
                         self.get_param('charset', 'ascii'), 'replace')
                 except LookupError:
                     payload = bpayload.decode('ascii', 'replace')
                 if decode:
                     try:
                         bpayload = payload.encode('ascii')
                     except UnicodeError:
                         bpayload = payload.encode('raw-unicode-escape')
         elif decode:
             try:
                 bpayload = payload.encode('ascii')
             except UnicodeError:
                 bpayload = payload.encode('raw-unicode-escape')
     if not decode:
         return payload
     if cte == 'quoted-printable':
         return utils._qdecode(bpayload)
     if cte == 'base64':
         (value, defects) = decode_b(b''.join(bpayload.splitlines()))
         for defect in defects:
             self.policy.handle_defect(self, defect)
         return value
     if cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
         in_file = BytesIO(bpayload)
         out_file = BytesIO()
         try:
             uu.decode(in_file, out_file, quiet=True)
             return out_file.getvalue()
         except uu.Error:
             return bpayload
     if isinstance(payload, str):
         return bpayload
     return payload
Ejemplo n.º 16
0
 def get_payload(self, i=None, decode=False):
     if self.is_multipart():
         if decode:
             return
         if i is None:
             return self._payload
         return self._payload[i]
     if i is not None and not isinstance(self._payload, list):
         raise TypeError('Expected list, got %s' % type(self._payload))
     payload = self._payload
     cte = str(self.get('content-transfer-encoding', '')).lower()
     if isinstance(payload, str):
         if utils._has_surrogates(payload):
             bpayload = payload.encode('ascii', 'surrogateescape')
             if not decode:
                 try:
                     payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
                 except LookupError:
                     payload = bpayload.decode('ascii', 'replace')
                 if decode:
                     try:
                         bpayload = payload.encode('ascii')
                     except UnicodeError:
                         bpayload = payload.encode('raw-unicode-escape')
         elif decode:
             try:
                 bpayload = payload.encode('ascii')
             except UnicodeError:
                 bpayload = payload.encode('raw-unicode-escape')
     if not decode:
         return payload
     if cte == 'quoted-printable':
         return utils._qdecode(bpayload)
     if cte == 'base64':
         (value, defects) = decode_b(b''.join(bpayload.splitlines()))
         for defect in defects:
             self.policy.handle_defect(self, defect)
         return value
     if cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
         in_file = BytesIO(bpayload)
         out_file = BytesIO()
         try:
             uu.decode(in_file, out_file, quiet=True)
             return out_file.getvalue()
         except uu.Error:
             return bpayload
     if isinstance(payload, str):
         return bpayload
     return payload
Ejemplo n.º 17
0
 def _handle_text(self, msg):
     payload = msg.get_payload()
     if payload is None:
         return
     if not isinstance(payload, str):
         raise TypeError("string payload expected: %s" % type(payload))
     if _has_surrogates(msg._payload):
         charset = msg.get_param("charset")
         if charset is not None:
             del msg["content-transfer-encoding"]
             msg.set_payload(payload, charset)
             payload = msg.get_payload()
     if self._mangle_from_:
         payload = fcre.sub(">From ", payload)
     self._write_lines(payload)
Ejemplo n.º 18
0
 def _handle_text(self, msg):
     payload = msg.get_payload()
     if payload is None:
         return
     if not isinstance(payload, str):
         raise TypeError('string payload expected: %s' % type(payload))
     if _has_surrogates(msg._payload):
         charset = msg.get_param('charset')
         if charset is not None:
             del msg['content-transfer-encoding']
             msg.set_payload(payload, charset)
             payload = msg.get_payload()
     if self._mangle_from_:
         payload = fcre.sub('>From ', payload)
     self.write(payload)
Ejemplo n.º 19
0
 def _handle_text(self, msg):
     payload = msg.get_payload()
     if payload is None:
         return
     if not isinstance(payload, str):
         raise TypeError('string payload expected: %s' % type(payload))
     if _has_surrogates(msg._payload):
         charset = msg.get_param('charset')
         if charset is not None:
             del msg['content-transfer-encoding']
             msg.set_payload(payload, charset)
             payload = msg.get_payload()
     if self._mangle_from_:
         payload = fcre.sub('>From ', payload)
     self.write(payload)
Ejemplo n.º 20
0
 def _fold(self, name, value, sanitize):
     parts = []
     parts.append('%s: ' % name)
     if isinstance(value, str):
         if _has_surrogates(value):
             if sanitize:
                 h = header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name)
             else:
                 parts.append(value)
                 h = None
                 h = header.Header(value, header_name=name)
         else:
             h = header.Header(value, header_name=name)
     else:
         h = value
     if h is not None:
         parts.append(h.encode(linesep=self.linesep, maxlinelen=self.max_line_length))
     parts.append(self.linesep)
     return ''.join(parts)
Ejemplo n.º 21
0
 def _handle_text(self, msg):
     payload = msg.get_payload()
     if payload is None:
         return
     if not isinstance(payload, str):
         raise TypeError('string payload expected: %s' % type(payload))
     if _has_surrogates(msg._payload):
         charset = msg.get_param('charset')
         if charset is not None:
             # XXX: This copy stuff is an ugly hack to avoid modifying the
             # existing message.
             msg = deepcopy(msg)
             del msg['content-transfer-encoding']
             msg.set_payload(payload, charset)
             payload = msg.get_payload()
             self._munge_cte = (msg['content-transfer-encoding'],
                                msg['content-type'])
     if self._mangle_from_:
         payload = fcre.sub('>From ', payload)
     self._write_lines(payload)
Ejemplo n.º 22
0
 def _handle_text(self, msg):
     payload = msg.get_payload()
     if payload is None:
         return
     if not isinstance(payload, str):
         raise TypeError('string payload expected: %s' % type(payload))
     if _has_surrogates(msg._payload):
         charset = msg.get_param('charset')
         if charset is not None:
             # XXX: This copy stuff is an ugly hack to avoid modifying the
             # existing message.
             msg = deepcopy(msg)
             del msg['content-transfer-encoding']
             msg.set_payload(payload, charset)
             payload = msg.get_payload()
             self._munge_cte = (msg['content-transfer-encoding'],
                                msg['content-type'])
     if self._mangle_from_:
         payload = fcre.sub('>From ', payload)
     self._write_lines(payload)
Ejemplo n.º 23
0
 def _fold(self, name, value, sanitize):
     parts = []
     parts.append('%s: ' % name)
     if isinstance(value, str):
         if _has_surrogates(value):
             if sanitize:
                 h = header.Header(value, charset=(_charset.UNKNOWN8BIT),
                   header_name=name)
             else:
                 parts.append(value)
                 h = None
         else:
             h = header.Header(value, header_name=name)
     else:
         h = value
     if h is not None:
         maxlinelen = 0
         if self.max_line_length is not None:
             maxlinelen = self.max_line_length
         parts.append(h.encode(linesep=(self.linesep), maxlinelen=maxlinelen))
     parts.append(self.linesep)
     return ''.join(parts)
Ejemplo n.º 24
0
    def get_payload(self, i=None, decode=False):
        """Return a reference to the payload.

        The payload will either be a list object or a string.  If you mutate
        the list object, you modify the message's payload in place.  Optional
        i returns that index into the payload.

        Optional decode is a flag indicating whether the payload should be
        decoded or not, according to the Content-Transfer-Encoding header
        (default is False).

        When True and the message is not a multipart, the payload will be
        decoded if this header's value is `quoted-printable' or `base64'.  If
        some other encoding is used, or the header is missing, or if the
        payload has bogus data (i.e. bogus base64 or uuencoded data), the
        payload is returned as-is.

        If the message is a multipart and the decode flag is True, then None
        is returned.
        """
        # Here is the logic table for this code, based on the email5.0.0 code:
        #   i     decode  is_multipart  result
        # ------  ------  ------------  ------------------------------
        #  None   True    True          None
        #   i     True    True          None
        #  None   False   True          _payload (a list)
        #   i     False   True          _payload element i (a Message)
        #   i     False   False         error (not a list)
        #   i     True    False         error (not a list)
        #  None   False   False         _payload
        #  None   True    False         _payload decoded (bytes)
        # Note that Barry planned to factor out the 'decode' case, but that
        # isn't so easy now that we handle the 8 bit data, which needs to be
        # converted in both the decode and non-decode path.
        if self.is_multipart():
            if decode:
                return None
            if i is None:
                return self._payload
            else:
                return self._payload[i]
        # For backward compatibility, Use isinstance and this error message
        # instead of the more logical is_multipart test.
        if i is not None and not isinstance(self._payload, list):
            raise TypeError('Expected list, got %s' % type(self._payload))
        payload = self._payload
        # cte might be a Header, so for now stringify it.
        cte = str(self.get('content-transfer-encoding', '')).lower()
        # payload may be bytes here.
        if isinstance(payload, str):
            if utils._has_surrogates(payload):
                bpayload = payload.encode('ascii', 'surrogateescape')
                if not decode:
                    try:
                        payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
                    except LookupError:
                        payload = bpayload.decode('ascii', 'replace')
            elif decode:
                try:
                    bpayload = payload.encode('ascii')
                except UnicodeError:
                    # This won't happen for RFC compliant messages (messages
                    # containing only ASCII codepoints in the unicode input).
                    # If it does happen, turn the string into bytes in a way
                    # guaranteed not to fail.
                    bpayload = payload.encode('raw-unicode-escape')
        if not decode:
            return payload
        if cte == 'quoted-printable':
            return utils._qdecode(bpayload)
        elif cte == 'base64':
            # XXX: this is a bit of a hack; decode_b should probably be factored
            # out somewhere, but I haven't figured out where yet.
            value, defects = decode_b(b''.join(bpayload.splitlines()))
            for defect in defects:
                self.policy.handle_defect(self, defect)
            return value
        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
            in_file = BytesIO(bpayload)
            out_file = BytesIO()
            try:
                uu.decode(in_file, out_file, quiet=True)
                return out_file.getvalue()
            except uu.Error:
                # Some decoding problem
                return bpayload
        if isinstance(payload, str):
            return bpayload
        return payload
 def test_as_string_unicode_surrogates(self):
     with path('mailman.email.tests.data', 'bad_email_4.eml') as email_path:
         with open(str(email_path), 'rb') as fp:
             msg = message_from_binary_file(fp, Message)
     self.assertFalse(_has_surrogates(msg.as_string()))
Ejemplo n.º 26
0
    def get_payload(self, i=None, decode=False):
        """Return a reference to the payload.

        The payload will either be a list object or a string.  If you mutate
        the list object, you modify the message's payload in place.  Optional
        i returns that index into the payload.

        Optional decode is a flag indicating whether the payload should be
        decoded or not, according to the Content-Transfer-Encoding header
        (default is False).

        When True and the message is not a multipart, the payload will be
        decoded if this header's value is `quoted-printable' or `base64'.  If
        some other encoding is used, or the header is missing, or if the
        payload has bogus data (i.e. bogus base64 or uuencoded data), the
        payload is returned as-is.

        If the message is a multipart and the decode flag is True, then None
        is returned.
        """
        if self.is_multipart():
            if decode:
                return None
            if i is None:
                return self._payload
            else:
                return self._payload[i]
        if i is not None and not isinstance(self._payload, list):
            raise TypeError('Expected list, got %s' % type(self._payload))
        payload = self._payload
        cte = str(self.get('content-transfer-encoding', '')).lower()
        if isinstance(payload, str):
            if utils._has_surrogates(payload):
                bpayload = payload.encode('ascii', 'surrogateescape')
                if not decode:
                    try:
                        payload = bpayload.decode(
                            self.get_param('charset', 'ascii'), 'replace')
                    except LookupError:
                        payload = bpayload.decode('ascii', 'replace')
            elif decode:
                try:
                    bpayload = payload.encode('ascii')
                except UnicodeError:
                    bpayload = payload.encode('raw-unicode-escape')
        if not decode:
            return payload
        if cte == 'quoted-printable':
            return quopri.decodestring(bpayload)
        elif cte == 'base64':
            value, defects = decode_b(b''.join(bpayload.splitlines()))
            for defect in defects:
                self.policy.handle_defect(self, defect)
            return value
        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
            in_file = BytesIO(bpayload)
            out_file = BytesIO()
            try:
                uu.decode(in_file, out_file, quiet=True)
                return out_file.getvalue()
            except uu.Error:
                return bpayload
        if isinstance(payload, str):
            return bpayload
        return payload
Ejemplo n.º 27
0
    def get_payload(self, i=None, decode=False):
        """Return a reference to the payload.

        The payload will either be a list object or a string.  If you mutate
        the list object, you modify the message's payload in place.  Optional
        i returns that index into the payload.

        Optional decode is a flag indicating whether the payload should be
        decoded or not, according to the Content-Transfer-Encoding header
        (default is False).

        When True and the message is not a multipart, the payload will be
        decoded if this header's value is `quoted-printable' or `base64'.  If
        some other encoding is used, or the header is missing, or if the
        payload has bogus data (i.e. bogus base64 or uuencoded data), the
        payload is returned as-is.

        If the message is a multipart and the decode flag is True, then None
        is returned.
        """
        # Here is the logic table for this code, based on the email5.0.0 code:
        #   i     decode  is_multipart  result
        # ------  ------  ------------  ------------------------------
        #  None   True    True          None
        #   i     True    True          None
        #  None   False   True          _payload (a list)
        #   i     False   True          _payload element i (a Message)
        #   i     False   False         error (not a list)
        #   i     True    False         error (not a list)
        #  None   False   False         _payload
        #  None   True    False         _payload decoded (bytes)
        # Note that Barry planned to factor out the 'decode' case, but that
        # isn't so easy now that we handle the 8 bit data, which needs to be
        # converted in both the decode and non-decode path.
        if self.is_multipart():
            if decode:
                return None
            if i is None:
                return self._payload
            else:
                return self._payload[i]
        # For backward compatibility, Use isinstance and this error message
        # instead of the more logical is_multipart test.
        if i is not None and not isinstance(self._payload, list):
            raise TypeError('Expected list, got %s' % type(self._payload))
        payload = self._payload
        # cte might be a Header, so for now stringify it.
        cte = str(self.get('content-transfer-encoding', '')).lower()
        # payload may be bytes here.
        if isinstance(payload, str):
            if utils._has_surrogates(payload):
                bpayload = payload.encode('ascii', 'surrogateescape')
                if not decode:
                    try:
                        payload = bpayload.decode(
                            self.get_param('charset', 'ascii'), 'replace')
                    except LookupError:
                        payload = bpayload.decode('ascii', 'replace')
            elif decode:
                try:
                    bpayload = payload.encode('ascii')
                except UnicodeError:
                    # This won't happen for RFC compliant messages (messages
                    # containing only ASCII codepoints in the unicode input).
                    # If it does happen, turn the string into bytes in a way
                    # guaranteed not to fail.
                    bpayload = payload.encode('raw-unicode-escape')
        if not decode:
            return payload
        if cte == 'quoted-printable':
            return utils._qdecode(bpayload)
        elif cte == 'base64':
            # XXX: this is a bit of a hack; decode_b should probably be factored
            # out somewhere, but I haven't figured out where yet.
            value, defects = decode_b(b''.join(bpayload.splitlines()))
            for defect in defects:
                self.policy.handle_defect(self, defect)
            return value
        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
            in_file = BytesIO(bpayload)
            out_file = BytesIO()
            try:
                uu.decode(in_file, out_file, quiet=True)
                return out_file.getvalue()
            except uu.Error:
                # Some decoding problem
                return bpayload
        if isinstance(payload, str):
            return bpayload
        return payload
Ejemplo n.º 28
0
 def _sanitize_header(self, name, value):
     if not isinstance(value, str):
         return value
     if _has_surrogates(value):
         return header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name)
     return value