def _fold(self, name, value, sanitize): parts = [] parts.append('%s: ' % name) if isinstance(value, str): if _has_surrogates(value): if sanitize: h = header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name) else: # If we have raw 8bit data in a byte string, we have no idea # what the encoding is. There is no safe way to split this # string. If it's ascii-subset, then we could do a normal # ascii split, but if it's multibyte then we could break the # string. There's no way to know so the least harm seems to # be to not split the string and risk it being too long. parts.append(value) h = None else: h = header.Header(value, header_name=name) else: # Assume it is a Header-like object. h = value if h is not None: parts.append(h.encode(linesep=self.linesep, maxlinelen=self.max_line_length)) parts.append(self.linesep) return ''.join(parts)
def _fold(self, name, value, sanitize): parts = [] parts.append('%s: ' % name) if isinstance(value, str): if _has_surrogates(value): if sanitize: h = header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name) else: # If we have raw 8bit data in a byte string, we have no idea # what the encoding is. There is no safe way to split this # string. If it's ascii-subset, then we could do a normal # ascii split, but if it's multibyte then we could break the # string. There's no way to know so the least harm seems to # be to not split the string and risk it being too long. parts.append(value) h = None else: h = header.Header(value, header_name=name) else: # Assume it is a Header-like object. h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the # default value of 78, as recommended by RFC 2822. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen)) parts.append(self.linesep) return ''.join(parts)
def _sanitize_header(self, name, value): if not isinstance(value, str): return value elif _has_surrogates(value): return header.Header(value, charset=(_charset.UNKNOWN8BIT), header_name=name) else: return value
def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) maxlen = self.max_line_length if self.max_line_length else float('inf') lines = value.splitlines() refold = self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0]) + len(name) + 2 > maxlen or any(len(x) > maxlen for x in lines[1:])) if refold or refold_binary and _has_surrogates(value): return self.header_factory(name, ''.join(lines)).fold(policy=self) return name + ': ' + self.linesep.join(lines) + self.linesep
def _handle_text(self, msg): # If the string has surrogates the original source was bytes, so # just write it back out. if msg._payload is None: return if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': self.write(msg._payload) else: super(BytesGenerator,self)._handle_text(msg)
def __new__(cls, name, value): kwds = {"defects": []} cls.parse(value, kwds) if utils._has_surrogates(kwds["decoded"]): kwds["decoded"] = utils._sanitize(kwds["decoded"]) self = str.__new__(cls, kwds["decoded"]) del kwds["decoded"] self.init(name, **kwds) return self
def __new__(cls, name, value): kwds = {'defects': []} cls.parse(value, kwds) if utils._has_surrogates(kwds['decoded']): kwds['decoded'] = utils._sanitize(kwds['decoded']) self = str.__new__(cls, kwds['decoded']) del kwds['decoded'] self.init(name, **kwds) return self
def _sanitize_header(self, name, value): # If the header value contains surrogates, return a Header using # the unknown-8bit charset to encode the bytes as encoded words. if not isinstance(value, str): # Assume it is already a header object return value if _has_surrogates(value): return header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name) else: return value
def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) maxlen = self.max_line_length if self.max_line_length else float('inf') lines = value.splitlines() refold = self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0]) + len(name) + 2 > maxlen or any(len(x) > maxlen for x in lines[1:])) if refold or refold_binary and _has_surrogates(value): return self.header_factory(name, ''.join(lines)).fold(policy=self) else: return name + ': ' + self.linesep.join(lines) + self.linesep
def _handle_text(self, msg): if msg._payload is None: return if _has_surrogates( msg._payload) and not self.policy.cte_type == '7bit': if self._mangle_from_: msg._payload = fcre.sub('>From ', msg._payload) self._write_lines(msg._payload) else: super(BytesGenerator, self)._handle_text(msg)
def _handle_text(self, msg): # If the string has surrogates the original source was bytes, so # just write it back out. if msg._payload is None: return if _has_surrogates( msg._payload) and not self.policy.cte_type == '7bit': self.write(msg._payload) else: super(BytesGenerator, self)._handle_text(msg)
def _handle_text(self, msg): # If the string has surrogates the original source was bytes, so # just write it back out. if msg._payload is None: return if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': if self._mangle_from_: msg._payload = fcre.sub(">From ", msg._payload) self._write_lines(msg._payload) else: super(BytesGenerator,self)._handle_text(msg)
def get_payload(self, i=None, decode=False): if self.is_multipart(): if decode: return if i is None: return self._payload return self._payload[i] if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload cte = str(self.get('content-transfer-encoding', '')).lower() if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: payload = bpayload.decode( self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') if decode: try: bpayload = payload.encode('ascii') except UnicodeError: bpayload = payload.encode('raw-unicode-escape') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: bpayload = payload.encode('raw-unicode-escape') if not decode: return payload if cte == 'quoted-printable': return utils._qdecode(bpayload) if cte == 'base64': (value, defects) = decode_b(b''.join(bpayload.splitlines())) for defect in defects: self.policy.handle_defect(self, defect) return value if cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) return out_file.getvalue() except uu.Error: return bpayload if isinstance(payload, str): return bpayload return payload
def get_payload(self, i=None, decode=False): if self.is_multipart(): if decode: return if i is None: return self._payload return self._payload[i] if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload cte = str(self.get('content-transfer-encoding', '')).lower() if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') if decode: try: bpayload = payload.encode('ascii') except UnicodeError: bpayload = payload.encode('raw-unicode-escape') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: bpayload = payload.encode('raw-unicode-escape') if not decode: return payload if cte == 'quoted-printable': return utils._qdecode(bpayload) if cte == 'base64': (value, defects) = decode_b(b''.join(bpayload.splitlines())) for defect in defects: self.policy.handle_defect(self, defect) return value if cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) return out_file.getvalue() except uu.Error: return bpayload if isinstance(payload, str): return bpayload return payload
def _handle_text(self, msg): payload = msg.get_payload() if payload is None: return if not isinstance(payload, str): raise TypeError("string payload expected: %s" % type(payload)) if _has_surrogates(msg._payload): charset = msg.get_param("charset") if charset is not None: del msg["content-transfer-encoding"] msg.set_payload(payload, charset) payload = msg.get_payload() if self._mangle_from_: payload = fcre.sub(">From ", payload) self._write_lines(payload)
def _handle_text(self, msg): payload = msg.get_payload() if payload is None: return if not isinstance(payload, str): raise TypeError('string payload expected: %s' % type(payload)) if _has_surrogates(msg._payload): charset = msg.get_param('charset') if charset is not None: del msg['content-transfer-encoding'] msg.set_payload(payload, charset) payload = msg.get_payload() if self._mangle_from_: payload = fcre.sub('>From ', payload) self.write(payload)
def _fold(self, name, value, sanitize): parts = [] parts.append('%s: ' % name) if isinstance(value, str): if _has_surrogates(value): if sanitize: h = header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name) else: parts.append(value) h = None h = header.Header(value, header_name=name) else: h = header.Header(value, header_name=name) else: h = value if h is not None: parts.append(h.encode(linesep=self.linesep, maxlinelen=self.max_line_length)) parts.append(self.linesep) return ''.join(parts)
def _handle_text(self, msg): payload = msg.get_payload() if payload is None: return if not isinstance(payload, str): raise TypeError('string payload expected: %s' % type(payload)) if _has_surrogates(msg._payload): charset = msg.get_param('charset') if charset is not None: # XXX: This copy stuff is an ugly hack to avoid modifying the # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] msg.set_payload(payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) if self._mangle_from_: payload = fcre.sub('>From ', payload) self._write_lines(payload)
def _fold(self, name, value, sanitize): parts = [] parts.append('%s: ' % name) if isinstance(value, str): if _has_surrogates(value): if sanitize: h = header.Header(value, charset=(_charset.UNKNOWN8BIT), header_name=name) else: parts.append(value) h = None else: h = header.Header(value, header_name=name) else: h = value if h is not None: maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length parts.append(h.encode(linesep=(self.linesep), maxlinelen=maxlinelen)) parts.append(self.linesep) return ''.join(parts)
def get_payload(self, i=None, decode=False): """Return a reference to the payload. The payload will either be a list object or a string. If you mutate the list object, you modify the message's payload in place. Optional i returns that index into the payload. Optional decode is a flag indicating whether the payload should be decoded or not, according to the Content-Transfer-Encoding header (default is False). When True and the message is not a multipart, the payload will be decoded if this header's value is `quoted-printable' or `base64'. If some other encoding is used, or the header is missing, or if the payload has bogus data (i.e. bogus base64 or uuencoded data), the payload is returned as-is. If the message is a multipart and the decode flag is True, then None is returned. """ # Here is the logic table for this code, based on the email5.0.0 code: # i decode is_multipart result # ------ ------ ------------ ------------------------------ # None True True None # i True True None # None False True _payload (a list) # i False True _payload element i (a Message) # i False False error (not a list) # i True False error (not a list) # None False False _payload # None True False _payload decoded (bytes) # Note that Barry planned to factor out the 'decode' case, but that # isn't so easy now that we handle the 8 bit data, which needs to be # converted in both the decode and non-decode path. if self.is_multipart(): if decode: return None if i is None: return self._payload else: return self._payload[i] # For backward compatibility, Use isinstance and this error message # instead of the more logical is_multipart test. if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: # This won't happen for RFC compliant messages (messages # containing only ASCII codepoints in the unicode input). # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') if not decode: return payload if cte == 'quoted-printable': return utils._qdecode(bpayload) elif cte == 'base64': # XXX: this is a bit of a hack; decode_b should probably be factored # out somewhere, but I haven't figured out where yet. value, defects = decode_b(b''.join(bpayload.splitlines())) for defect in defects: self.policy.handle_defect(self, defect) return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) return out_file.getvalue() except uu.Error: # Some decoding problem return bpayload if isinstance(payload, str): return bpayload return payload
def test_as_string_unicode_surrogates(self): with path('mailman.email.tests.data', 'bad_email_4.eml') as email_path: with open(str(email_path), 'rb') as fp: msg = message_from_binary_file(fp, Message) self.assertFalse(_has_surrogates(msg.as_string()))
def get_payload(self, i=None, decode=False): """Return a reference to the payload. The payload will either be a list object or a string. If you mutate the list object, you modify the message's payload in place. Optional i returns that index into the payload. Optional decode is a flag indicating whether the payload should be decoded or not, according to the Content-Transfer-Encoding header (default is False). When True and the message is not a multipart, the payload will be decoded if this header's value is `quoted-printable' or `base64'. If some other encoding is used, or the header is missing, or if the payload has bogus data (i.e. bogus base64 or uuencoded data), the payload is returned as-is. If the message is a multipart and the decode flag is True, then None is returned. """ if self.is_multipart(): if decode: return None if i is None: return self._payload else: return self._payload[i] if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload cte = str(self.get('content-transfer-encoding', '')).lower() if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: payload = bpayload.decode( self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: bpayload = payload.encode('raw-unicode-escape') if not decode: return payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': value, defects = decode_b(b''.join(bpayload.splitlines())) for defect in defects: self.policy.handle_defect(self, defect) return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) return out_file.getvalue() except uu.Error: return bpayload if isinstance(payload, str): return bpayload return payload
def get_payload(self, i=None, decode=False): """Return a reference to the payload. The payload will either be a list object or a string. If you mutate the list object, you modify the message's payload in place. Optional i returns that index into the payload. Optional decode is a flag indicating whether the payload should be decoded or not, according to the Content-Transfer-Encoding header (default is False). When True and the message is not a multipart, the payload will be decoded if this header's value is `quoted-printable' or `base64'. If some other encoding is used, or the header is missing, or if the payload has bogus data (i.e. bogus base64 or uuencoded data), the payload is returned as-is. If the message is a multipart and the decode flag is True, then None is returned. """ # Here is the logic table for this code, based on the email5.0.0 code: # i decode is_multipart result # ------ ------ ------------ ------------------------------ # None True True None # i True True None # None False True _payload (a list) # i False True _payload element i (a Message) # i False False error (not a list) # i True False error (not a list) # None False False _payload # None True False _payload decoded (bytes) # Note that Barry planned to factor out the 'decode' case, but that # isn't so easy now that we handle the 8 bit data, which needs to be # converted in both the decode and non-decode path. if self.is_multipart(): if decode: return None if i is None: return self._payload else: return self._payload[i] # For backward compatibility, Use isinstance and this error message # instead of the more logical is_multipart test. if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: payload = bpayload.decode( self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') elif decode: try: bpayload = payload.encode('ascii') except UnicodeError: # This won't happen for RFC compliant messages (messages # containing only ASCII codepoints in the unicode input). # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') if not decode: return payload if cte == 'quoted-printable': return utils._qdecode(bpayload) elif cte == 'base64': # XXX: this is a bit of a hack; decode_b should probably be factored # out somewhere, but I haven't figured out where yet. value, defects = decode_b(b''.join(bpayload.splitlines())) for defect in defects: self.policy.handle_defect(self, defect) return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) return out_file.getvalue() except uu.Error: # Some decoding problem return bpayload if isinstance(payload, str): return bpayload return payload
def _sanitize_header(self, name, value): if not isinstance(value, str): return value if _has_surrogates(value): return header.Header(value, charset=_charset.UNKNOWN8BIT, header_name=name) return value