def test_header_parse_failure(self, mock_parsestr, mock_decode_header): """Ensure get_patch_name() handles a parsing failure.""" mock_parsestr.return_value = {'Subject': "Testing"} mock_decode_header.side_effect = HeaderParseError('Fail') result = skt.misc.get_patch_name('') self.assertEqual('<SUBJECT ENCODING INVALID>', result)
def get_header_value(self, header_name): raw_value = self.get(header_name) logger.debug(self.my_log("primitive header: %s " % raw_value)) if raw_value is None: raise HeaderParseError("header [%s] not exist" % header_name) else: return decode_header_value(raw_value)
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): """Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in an email header. Only certain character sets are readable in most email clients, and as header strings can only contain a subset of 7-bit ASCII, care must be taken to properly convert and encode (with Base64 or quoted-printable) header strings. In addition, there is a 75-character length limit on any given encoded header field, so line-wrapping must be performed, even with double-byte character sets. This method will do its best to convert the string to the correct character set used in email, and encode and line wrap it safely with the appropriate scheme for that character set. If the given charset is not known or an error occurs during conversion, this function will return the header untouched. Optional splitchars is a string containing characters to split long ASCII lines on, in rough support of RFC 2822's `highest level syntactic breaks'. This doesn't affect RFC 2047 encoded lines. Optional linesep is a string to be used to separate the lines of the value. The default value is the most useful for typical Python applications, but it can be set to \r\n to produce RFC-compliant line separators when needed. """ self._normalize() if maxlinelen is None: maxlinelen = self._maxlinelen # A maxlinelen of 0 means don't wrap. For all practical purposes, # choosing a huge number here accomplishes that and makes the # _ValueFormatter algorithm much simpler. if maxlinelen == 0: maxlinelen = 1000000 formatter = _ValueFormatter(self._headerlen, maxlinelen, self._continuation_ws, splitchars) for string, charset in self._chunks: lines = string.splitlines() formatter.feed(lines[0], charset) for line in lines[1:]: formatter.newline() if charset.header_encoding is not None: formatter.feed(self._continuation_ws, USASCII) line = ' ' + line.lstrip() formatter.feed(line, charset) if len(lines) > 1: formatter.newline() formatter.add_transition() value = formatter._str(linesep) if _embeded_header.search(value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): self._normalize() if maxlinelen is None: maxlinelen = self._maxlinelen if maxlinelen == 0: maxlinelen = 1000000 formatter = _ValueFormatter(self._headerlen, maxlinelen, self._continuation_ws, splitchars) lastcs = None hasspace = lastspace = None for (string, charset) in self._chunks: if hasspace is not None: hasspace = string and self._nonctext(string[0]) import sys if lastcs not in (None, 'us-ascii'): if not hasspace or charset not in (None, 'us-ascii'): formatter.add_transition() if charset not in (None, 'us-ascii') and not lastspace: formatter.add_transition() elif charset not in (None, 'us-ascii') and not lastspace: formatter.add_transition() lastspace = string and self._nonctext(string[-1]) lastcs = charset hasspace = False lines = string.splitlines() if lines: formatter.feed('', lines[0], charset) else: formatter.feed('', '', charset) for line in lines[1:]: formatter.newline() if charset.header_encoding is not None: formatter.feed(self._continuation_ws, ' ' + line.lstrip(), charset) else: sline = line.lstrip() fws = line[:len(line) - len(sline)] formatter.feed(fws, sline, charset) while len(lines) > 1: formatter.newline() if self._chunks: formatter.add_transition() value = formatter._str(linesep) if _embeded_header.search(value): raise HeaderParseError( 'header value appears to contain an embedded header: {!r}'. format(value)) return value
def encode(self, splitchars=';, '): newchunks = [] maxlinelen = self._firstlinelen lastlen = 0 for s, charset in self._chunks: targetlen = maxlinelen - lastlen - 1 if targetlen < charset.encoded_header_len(''): targetlen = maxlinelen newchunks += self._split(s, charset, targetlen, splitchars) lastchunk, lastcharset = newchunks[-1] lastlen = lastcharset.encoded_header_len(lastchunk) value = self._encode_chunks(newchunks, maxlinelen) if _embeded_header.search(value): raise HeaderParseError( 'header value appears to contain an embedded header: {!r}'. format(value)) return value
def encode(self, splitchars=';, '): """Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in an email header. Only certain character sets are readable in most email clients, and as header strings can only contain a subset of 7-bit ASCII, care must be taken to properly convert and encode (with Base64 or quoted-printable) header strings. In addition, there is a 75-character length limit on any given encoded header field, so line-wrapping must be performed, even with double-byte character sets. This method will do its best to convert the string to the correct character set used in email, and encode and line wrap it safely with the appropriate scheme for that character set. If the given charset is not known or an error occurs during conversion, this function will return the header untouched. Optional splitchars is a string containing characters to split long ASCII lines on, in rough support of RFC 2822's `highest level syntactic breaks'. This doesn't affect RFC 2047 encoded lines. """ newchunks = [] maxlinelen = self._firstlinelen lastlen = 0 for s, charset in self._chunks: # The first bit of the next chunk should be just long enough to # fill the next line. Don't forget the space separating the # encoded words. targetlen = maxlinelen - lastlen - 1 if targetlen < charset.encoded_header_len(''): # Stick it on the next line targetlen = maxlinelen newchunks += self._split(s, charset, targetlen, splitchars) lastchunk, lastcharset = newchunks[-1] lastlen = lastcharset.encoded_header_len(lastchunk) value = self._encode_chunks(newchunks, maxlinelen) if _embeded_header.search(value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value
def utf8(s, reject_newlines=True): if reject_newlines and '\n' in s: raise HeaderParseError( 'header value contains unexpected newline: {!r}'.format(s)) return s.encode('utf8') if isinstance(s, unicode) else s
def decode_header(header): """Decode a message header value without converting charset. Returns a list of (string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): return [(_charset._encode(string, str(charset)), str(charset)) for string, charset in header._chunks] # If no encoding, just return the header with no charset. if not ecre.search(header): return [(header, None)] # First step is to parse all the encoded parts into triplets of the form # (encoded_string, encoding, charset). For unencoded strings, the last # two parts will be None. words = [] for line in header.splitlines(): parts = ecre.split(line) first = True while parts: unencoded = parts.pop(0) if first: unencoded = unencoded.lstrip() first = False if unencoded: words.append((unencoded, None, None)) if parts: charset = parts.pop(0).lower() encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) # Now loop over words and remove words that consist of whitespace # between two encoded strings. droplist = [] for n, w in enumerate(words): if n > 1 and w[1] and words[n - 2][1] and words[n - 1][0].isspace(): droplist.append(n - 1) for d in reversed(droplist): del words[d] # The next step is to decode each encoded word by applying the reverse # base64 or quopri transformation. decoded_words is now a list of the # form (decoded_word, charset). decoded_words = [] for encoded_string, encoding, charset in words: if encoding is None: # This is an unencoded word. decoded_words.append((encoded_string, charset)) elif encoding == 'q': word = email.quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': paderr = len( encoded_string) % 4 # Postel's law: add missing padding if paderr: encoded_string += '==='[:4 - paderr] try: word = email.base64mime.decode(encoded_string) except binascii.Error: raise HeaderParseError('Base64 decoding error') else: decoded_words.append((word, charset)) else: raise AssertionError('Unexpected encoding: ' + encoding) # Now convert all words to bytes and collapse consecutive runs of # similarly encoded words. collapsed = [] last_word = last_charset = None for word, charset in decoded_words: if isinstance(word, str): word = bytes(word, 'raw-unicode-escape') if last_word is None: last_word = word last_charset = charset elif charset != last_charset: collapsed.append((last_word, last_charset)) last_word = word last_charset = charset elif last_charset is None: last_word += BSPACE + word else: last_word += word collapsed.append((last_word, last_charset)) return collapsed
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): r"""Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in an email header. Only certain character sets are readable in most email clients, and as header strings can only contain a subset of 7-bit ASCII, care must be taken to properly convert and encode (with Base64 or quoted-printable) header strings. In addition, there is a 75-character length limit on any given encoded header field, so line-wrapping must be performed, even with double-byte character sets. Optional maxlinelen specifies the maximum length of each generated line, exclusive of the linesep string. Individual lines may be longer than maxlinelen if a folding point cannot be found. The first line will be shorter by the length of the header name plus ": " if a header name was specified at Header construction time. The default value for maxlinelen is determined at header construction time. Optional splitchars is a string containing characters which should be given extra weight by the splitting algorithm during normal header wrapping. This is in very rough support of RFC 2822's `higher level syntactic breaks': split points preceded by a splitchar are preferred during line splitting, with the characters preferred in the order in which they appear in the string. Space and tab may be included in the string to indicate whether preference should be given to one over the other as a split point when other split chars do not appear in the line being split. Splitchars does not affect RFC 2047 encoded lines. Optional linesep is a string to be used to separate the lines of the value. The default value is the most useful for typical Python applications, but it can be set to \r\n to produce RFC-compliant line separators when needed. """ self._normalize() if maxlinelen is None: maxlinelen = self._maxlinelen # A maxlinelen of 0 means don't wrap. For all practical purposes, # choosing a huge number here accomplishes that and makes the # _ValueFormatter algorithm much simpler. if maxlinelen == 0: maxlinelen = 1000000 formatter = _ValueFormatter(self._headerlen, maxlinelen, self._continuation_ws, splitchars) lastcs = None hasspace = lastspace = None for string, charset in self._chunks: if hasspace is not None: hasspace = string and self._nonctext(string[0]) if lastcs not in (None, 'us-ascii'): if not hasspace or charset not in (None, 'us-ascii'): formatter.add_transition() elif charset not in (None, 'us-ascii') and not lastspace: formatter.add_transition() lastspace = string and self._nonctext(string[-1]) lastcs = charset hasspace = False lines = string.splitlines() if lines: formatter.feed('', lines[0], charset) else: formatter.feed('', '', charset) for line in lines[1:]: formatter.newline() if charset.header_encoding is not None: formatter.feed(self._continuation_ws, ' ' + line.lstrip(), charset) else: sline = line.lstrip() fws = line[:len(line) - len(sline)] formatter.feed(fws, sline, charset) if len(lines) > 1: formatter.newline() if self._chunks: formatter.add_transition() value = formatter._str(linesep) if _embeded_header.search(value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value
def decode_header(header): """Decode a message header value without converting charset. Returns a list of (string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. An email.Errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If no encoding, just return the header with no charset. if not ecre.search(header): return [(header, None)] # First step is to parse all the encoded parts into triplets of the form # (encoded_string, encoding, charset). For unencoded strings, the last # two parts will be None. words = [] for line in header.splitlines(): parts = ecre.split(line) while parts: unencoded = parts.pop(0).strip() if unencoded: words.append((unencoded, None, None)) if parts: charset = parts.pop(0).lower() encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) # The next step is to decode each encoded word by applying the reverse # base64 or quopri transformation. decoded_words is now a list of the # form (decoded_word, charset). decoded_words = [] for encoded_string, encoding, charset in words: if encoding is None: # This is an unencoded word. decoded_words.append((encoded_string, charset)) elif encoding == 'q': word = email.quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': try: word = email.base64mime.decode(encoded_string) except binascii.Error: raise HeaderParseError('Base64 decoding error') else: decoded_words.append((word, charset)) else: raise AssertionError('Unexpected encoding: ' + encoding) # Now convert all words to bytes and collapse consecutive runs of # similarly encoded words. collapsed = [] last_word = last_charset = None for word, charset in decoded_words: if isinstance(word, str): word = bytes(word, 'raw-unicode-escape') if last_word is None: last_word = word last_charset = charset elif charset != last_charset: collapsed.append((last_word, last_charset)) last_word = word last_charset = charset elif last_charset is None: last_word += BSPACE + word else: last_word += word collapsed.append((last_word, last_charset)) return collapsed
def decode_header(header): """Decode a message header value without converting charset. Returns a list of (string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ if hasattr(header, '_chunks'): return [(_charset._encode(string, str(charset)), str(charset)) for string, charset in header._chunks] elif not ecre.search(header): return [(header, None)] else: words = [] for line in header.splitlines(): parts = ecre.split(line) first = True while parts: unencoded = parts.pop(0) if first: unencoded = unencoded.lstrip() first = False if unencoded: words.append((unencoded, None, None)) if parts: charset = parts.pop(0).lower() encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) droplist = [] for n, w in enumerate(words): if n > 1: if w[1]: if words[(n - 2)][1]: if words[(n - 1)][0].isspace(): droplist.append(n - 1) for d in reversed(droplist): del words[d] decoded_words = [] for encoded_string, encoding, charset in words: if encoding is None: decoded_words.append((encoded_string, charset)) elif encoding == 'q': word = email.quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': paderr = len(encoded_string) % 4 if paderr: encoded_string += '==='[:4 - paderr] try: word = email.base64mime.decode(encoded_string) except binascii.Error: raise HeaderParseError('Base64 decoding error') else: decoded_words.append((word, charset)) else: raise AssertionError('Unexpected encoding: ' + encoding) collapsed = [] last_word = last_charset = None for word, charset in decoded_words: if isinstance(word, str): word = bytes(word, 'raw-unicode-escape') if last_word is None: last_word = word last_charset = charset elif charset != last_charset: collapsed.append((last_word, last_charset)) last_word = word last_charset = charset elif last_charset is None: last_word += BSPACE + word else: last_word += word collapsed.append((last_word, last_charset)) return collapsed
def decode_header(header): if hasattr(header, '_chunks'): return [(_charset._encode(string, str(charset)), str(charset)) for (string, charset) in header._chunks] if not ecre.search(header): return [(header, None)] words = [] for line in header.splitlines(): parts = ecre.split(line) first = True while parts: unencoded = parts.pop(0) if first: unencoded = unencoded.lstrip() first = False if unencoded: words.append((unencoded, None, None)) while parts: charset = parts.pop(0).lower() encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) continue import sys droplist = [] for (n, w) in enumerate(words): while n > 1 and (w[1] and words[n - 2][1]) and words[n - 1][0].isspace(): droplist.append(n - 1) for d in reversed(droplist): del words[d] decoded_words = [] for (encoded_string, encoding, charset) in words: if encoding is None: decoded_words.append((encoded_string, charset)) elif encoding == 'q': word = email.quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': paderr = len(encoded_string) % 4 if paderr: encoded_string += '==='[:4 - paderr] try: word = email.base64mime.decode(encoded_string) except binascii.Error: raise HeaderParseError('Base64 decoding error') decoded_words.append((word, charset)) else: raise AssertionError('Unexpected encoding: ' + encoding) collapsed = [] last_word = last_charset = None for (word, charset) in decoded_words: if isinstance(word, str): word = bytes(word, 'raw-unicode-escape') if last_word is None: last_word = word last_charset = charset elif charset != last_charset: collapsed.append((last_word, last_charset)) last_word = word last_charset = charset elif last_charset is None: last_word += BSPACE + word else: last_word += word collapsed.append((last_word, last_charset)) return collapsed