Example #1
0
    def test_header_parse_failure(self, mock_parsestr, mock_decode_header):
        """Ensure get_patch_name() handles a parsing failure."""
        mock_parsestr.return_value = {'Subject': "Testing"}
        mock_decode_header.side_effect = HeaderParseError('Fail')
        result = skt.misc.get_patch_name('')

        self.assertEqual('<SUBJECT ENCODING INVALID>', result)
Example #2
0
 def get_header_value(self, header_name):
     raw_value = self.get(header_name)
     logger.debug(self.my_log("primitive header: %s " % raw_value))
     if raw_value is None:
         raise HeaderParseError("header [%s] not exist" % header_name)
     else:
         return decode_header_value(raw_value)
    def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
        """Encode a message header into an RFC-compliant format.

        There are many issues involved in converting a given string for use in
        an email header.  Only certain character sets are readable in most
        email clients, and as header strings can only contain a subset of
        7-bit ASCII, care must be taken to properly convert and encode (with
        Base64 or quoted-printable) header strings.  In addition, there is a
        75-character length limit on any given encoded header field, so
        line-wrapping must be performed, even with double-byte character sets.

        This method will do its best to convert the string to the correct
        character set used in email, and encode and line wrap it safely with
        the appropriate scheme for that character set.

        If the given charset is not known or an error occurs during
        conversion, this function will return the header untouched.

        Optional splitchars is a string containing characters to split long
        ASCII lines on, in rough support of RFC 2822's `highest level
        syntactic breaks'.  This doesn't affect RFC 2047 encoded lines.

        Optional linesep is a string to be used to separate the lines of
        the value.  The default value is the most useful for typical
        Python applications, but it can be set to \r\n to produce RFC-compliant
        line separators when needed.
        """
        self._normalize()
        if maxlinelen is None:
            maxlinelen = self._maxlinelen
        # A maxlinelen of 0 means don't wrap.  For all practical purposes,
        # choosing a huge number here accomplishes that and makes the
        # _ValueFormatter algorithm much simpler.
        if maxlinelen == 0:
            maxlinelen = 1000000
        formatter = _ValueFormatter(self._headerlen, maxlinelen,
                                    self._continuation_ws, splitchars)
        for string, charset in self._chunks:
            lines = string.splitlines()
            formatter.feed(lines[0], charset)
            for line in lines[1:]:
                formatter.newline()
                if charset.header_encoding is not None:
                    formatter.feed(self._continuation_ws, USASCII)
                    line = ' ' + line.lstrip()
                formatter.feed(line, charset)
            if len(lines) > 1:
                formatter.newline()
            formatter.add_transition()
        value = formatter._str(linesep)
        if _embeded_header.search(value):
            raise HeaderParseError("header value appears to contain "
                                   "an embedded header: {!r}".format(value))
        return value
 def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
     self._normalize()
     if maxlinelen is None:
         maxlinelen = self._maxlinelen
     if maxlinelen == 0:
         maxlinelen = 1000000
     formatter = _ValueFormatter(self._headerlen, maxlinelen,
                                 self._continuation_ws, splitchars)
     lastcs = None
     hasspace = lastspace = None
     for (string, charset) in self._chunks:
         if hasspace is not None:
             hasspace = string and self._nonctext(string[0])
             import sys
             if lastcs not in (None, 'us-ascii'):
                 if not hasspace or charset not in (None, 'us-ascii'):
                     formatter.add_transition()
                     if charset not in (None, 'us-ascii') and not lastspace:
                         formatter.add_transition()
             elif charset not in (None, 'us-ascii') and not lastspace:
                 formatter.add_transition()
         lastspace = string and self._nonctext(string[-1])
         lastcs = charset
         hasspace = False
         lines = string.splitlines()
         if lines:
             formatter.feed('', lines[0], charset)
         else:
             formatter.feed('', '', charset)
         for line in lines[1:]:
             formatter.newline()
             if charset.header_encoding is not None:
                 formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
                                charset)
             else:
                 sline = line.lstrip()
                 fws = line[:len(line) - len(sline)]
                 formatter.feed(fws, sline, charset)
         while len(lines) > 1:
             formatter.newline()
     if self._chunks:
         formatter.add_transition()
     value = formatter._str(linesep)
     if _embeded_header.search(value):
         raise HeaderParseError(
             'header value appears to contain an embedded header: {!r}'.
             format(value))
     return value
Example #5
0
    def encode(self, splitchars=';, '):
        newchunks = []
        maxlinelen = self._firstlinelen
        lastlen = 0
        for s, charset in self._chunks:
            targetlen = maxlinelen - lastlen - 1
            if targetlen < charset.encoded_header_len(''):
                targetlen = maxlinelen
            newchunks += self._split(s, charset, targetlen, splitchars)
            lastchunk, lastcharset = newchunks[-1]
            lastlen = lastcharset.encoded_header_len(lastchunk)

        value = self._encode_chunks(newchunks, maxlinelen)
        if _embeded_header.search(value):
            raise HeaderParseError(
                'header value appears to contain an embedded header: {!r}'.
                format(value))
        return value
Example #6
0
    def encode(self, splitchars=';, '):
        """Encode a message header into an RFC-compliant format.

        There are many issues involved in converting a given string for use in
        an email header.  Only certain character sets are readable in most
        email clients, and as header strings can only contain a subset of
        7-bit ASCII, care must be taken to properly convert and encode (with
        Base64 or quoted-printable) header strings.  In addition, there is a
        75-character length limit on any given encoded header field, so
        line-wrapping must be performed, even with double-byte character sets.

        This method will do its best to convert the string to the correct
        character set used in email, and encode and line wrap it safely with
        the appropriate scheme for that character set.

        If the given charset is not known or an error occurs during
        conversion, this function will return the header untouched.

        Optional splitchars is a string containing characters to split long
        ASCII lines on, in rough support of RFC 2822's `highest level
        syntactic breaks'.  This doesn't affect RFC 2047 encoded lines.
        """
        newchunks = []
        maxlinelen = self._firstlinelen
        lastlen = 0
        for s, charset in self._chunks:
            # The first bit of the next chunk should be just long enough to
            # fill the next line.  Don't forget the space separating the
            # encoded words.
            targetlen = maxlinelen - lastlen - 1
            if targetlen < charset.encoded_header_len(''):
                # Stick it on the next line
                targetlen = maxlinelen
            newchunks += self._split(s, charset, targetlen, splitchars)
            lastchunk, lastcharset = newchunks[-1]
            lastlen = lastcharset.encoded_header_len(lastchunk)
        value = self._encode_chunks(newchunks, maxlinelen)
        if _embeded_header.search(value):
            raise HeaderParseError("header value appears to contain "
                "an embedded header: {!r}".format(value))
        return value
Example #7
0
 def utf8(s, reject_newlines=True):
     if reject_newlines and '\n' in s:
         raise HeaderParseError(
             'header value contains unexpected newline: {!r}'.format(s))
     return s.encode('utf8') if isinstance(s, unicode) else s
Example #8
0
def decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (string, charset) pairs containing each of the decoded
    parts of the header.  Charset is None for non-encoded parts of the header,
    otherwise a lower-case string containing the name of the character set
    specified in the encoded string.

    header may be a string that may or may not contain RFC2047 encoded words,
    or it may be a Header object.

    An email.errors.HeaderParseError may be raised when certain decoding error
    occurs (e.g. a base64 decoding exception).
    """
    # If it is a Header object, we can just return the encoded chunks.
    if hasattr(header, '_chunks'):
        return [(_charset._encode(string, str(charset)), str(charset))
                for string, charset in header._chunks]
    # If no encoding, just return the header with no charset.
    if not ecre.search(header):
        return [(header, None)]
    # First step is to parse all the encoded parts into triplets of the form
    # (encoded_string, encoding, charset).  For unencoded strings, the last
    # two parts will be None.
    words = []
    for line in header.splitlines():
        parts = ecre.split(line)
        first = True
        while parts:
            unencoded = parts.pop(0)
            if first:
                unencoded = unencoded.lstrip()
                first = False
            if unencoded:
                words.append((unencoded, None, None))
            if parts:
                charset = parts.pop(0).lower()
                encoding = parts.pop(0).lower()
                encoded = parts.pop(0)
                words.append((encoded, encoding, charset))
    # Now loop over words and remove words that consist of whitespace
    # between two encoded strings.
    droplist = []
    for n, w in enumerate(words):
        if n > 1 and w[1] and words[n - 2][1] and words[n - 1][0].isspace():
            droplist.append(n - 1)
    for d in reversed(droplist):
        del words[d]

    # The next step is to decode each encoded word by applying the reverse
    # base64 or quopri transformation.  decoded_words is now a list of the
    # form (decoded_word, charset).
    decoded_words = []
    for encoded_string, encoding, charset in words:
        if encoding is None:
            # This is an unencoded word.
            decoded_words.append((encoded_string, charset))
        elif encoding == 'q':
            word = email.quoprimime.header_decode(encoded_string)
            decoded_words.append((word, charset))
        elif encoding == 'b':
            paderr = len(
                encoded_string) % 4  # Postel's law: add missing padding
            if paderr:
                encoded_string += '==='[:4 - paderr]
            try:
                word = email.base64mime.decode(encoded_string)
            except binascii.Error:
                raise HeaderParseError('Base64 decoding error')
            else:
                decoded_words.append((word, charset))
        else:
            raise AssertionError('Unexpected encoding: ' + encoding)
    # Now convert all words to bytes and collapse consecutive runs of
    # similarly encoded words.
    collapsed = []
    last_word = last_charset = None
    for word, charset in decoded_words:
        if isinstance(word, str):
            word = bytes(word, 'raw-unicode-escape')
        if last_word is None:
            last_word = word
            last_charset = charset
        elif charset != last_charset:
            collapsed.append((last_word, last_charset))
            last_word = word
            last_charset = charset
        elif last_charset is None:
            last_word += BSPACE + word
        else:
            last_word += word
    collapsed.append((last_word, last_charset))
    return collapsed
Example #9
0
    def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
        r"""Encode a message header into an RFC-compliant format.

        There are many issues involved in converting a given string for use in
        an email header.  Only certain character sets are readable in most
        email clients, and as header strings can only contain a subset of
        7-bit ASCII, care must be taken to properly convert and encode (with
        Base64 or quoted-printable) header strings.  In addition, there is a
        75-character length limit on any given encoded header field, so
        line-wrapping must be performed, even with double-byte character sets.

        Optional maxlinelen specifies the maximum length of each generated
        line, exclusive of the linesep string.  Individual lines may be longer
        than maxlinelen if a folding point cannot be found.  The first line
        will be shorter by the length of the header name plus ": " if a header
        name was specified at Header construction time.  The default value for
        maxlinelen is determined at header construction time.

        Optional splitchars is a string containing characters which should be
        given extra weight by the splitting algorithm during normal header
        wrapping.  This is in very rough support of RFC 2822's `higher level
        syntactic breaks':  split points preceded by a splitchar are preferred
        during line splitting, with the characters preferred in the order in
        which they appear in the string.  Space and tab may be included in the
        string to indicate whether preference should be given to one over the
        other as a split point when other split chars do not appear in the line
        being split.  Splitchars does not affect RFC 2047 encoded lines.

        Optional linesep is a string to be used to separate the lines of
        the value.  The default value is the most useful for typical
        Python applications, but it can be set to \r\n to produce RFC-compliant
        line separators when needed.
        """
        self._normalize()
        if maxlinelen is None:
            maxlinelen = self._maxlinelen
        # A maxlinelen of 0 means don't wrap.  For all practical purposes,
        # choosing a huge number here accomplishes that and makes the
        # _ValueFormatter algorithm much simpler.
        if maxlinelen == 0:
            maxlinelen = 1000000
        formatter = _ValueFormatter(self._headerlen, maxlinelen,
                                    self._continuation_ws, splitchars)
        lastcs = None
        hasspace = lastspace = None
        for string, charset in self._chunks:
            if hasspace is not None:
                hasspace = string and self._nonctext(string[0])
                if lastcs not in (None, 'us-ascii'):
                    if not hasspace or charset not in (None, 'us-ascii'):
                        formatter.add_transition()
                elif charset not in (None, 'us-ascii') and not lastspace:
                    formatter.add_transition()
            lastspace = string and self._nonctext(string[-1])
            lastcs = charset
            hasspace = False
            lines = string.splitlines()
            if lines:
                formatter.feed('', lines[0], charset)
            else:
                formatter.feed('', '', charset)
            for line in lines[1:]:
                formatter.newline()
                if charset.header_encoding is not None:
                    formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
                                   charset)
                else:
                    sline = line.lstrip()
                    fws = line[:len(line) - len(sline)]
                    formatter.feed(fws, sline, charset)
            if len(lines) > 1:
                formatter.newline()
        if self._chunks:
            formatter.add_transition()
        value = formatter._str(linesep)
        if _embeded_header.search(value):
            raise HeaderParseError("header value appears to contain "
                                   "an embedded header: {!r}".format(value))
        return value
Example #10
0
def decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (string, charset) pairs containing each of the decoded
    parts of the header.  Charset is None for non-encoded parts of the header,
    otherwise a lower-case string containing the name of the character set
    specified in the encoded string.

    An email.Errors.HeaderParseError may be raised when certain decoding error
    occurs (e.g. a base64 decoding exception).
    """
    # If no encoding, just return the header with no charset.
    if not ecre.search(header):
        return [(header, None)]
    # First step is to parse all the encoded parts into triplets of the form
    # (encoded_string, encoding, charset).  For unencoded strings, the last
    # two parts will be None.
    words = []
    for line in header.splitlines():
        parts = ecre.split(line)
        while parts:
            unencoded = parts.pop(0).strip()
            if unencoded:
                words.append((unencoded, None, None))
            if parts:
                charset = parts.pop(0).lower()
                encoding = parts.pop(0).lower()
                encoded = parts.pop(0)
                words.append((encoded, encoding, charset))
    # The next step is to decode each encoded word by applying the reverse
    # base64 or quopri transformation.  decoded_words is now a list of the
    # form (decoded_word, charset).
    decoded_words = []
    for encoded_string, encoding, charset in words:
        if encoding is None:
            # This is an unencoded word.
            decoded_words.append((encoded_string, charset))
        elif encoding == 'q':
            word = email.quoprimime.header_decode(encoded_string)
            decoded_words.append((word, charset))
        elif encoding == 'b':
            try:
                word = email.base64mime.decode(encoded_string)
            except binascii.Error:
                raise HeaderParseError('Base64 decoding error')
            else:
                decoded_words.append((word, charset))
        else:
            raise AssertionError('Unexpected encoding: ' + encoding)
    # Now convert all words to bytes and collapse consecutive runs of
    # similarly encoded words.
    collapsed = []
    last_word = last_charset = None
    for word, charset in decoded_words:
        if isinstance(word, str):
            word = bytes(word, 'raw-unicode-escape')
        if last_word is None:
            last_word = word
            last_charset = charset
        elif charset != last_charset:
            collapsed.append((last_word, last_charset))
            last_word = word
            last_charset = charset
        elif last_charset is None:
            last_word += BSPACE + word
        else:
            last_word += word
    collapsed.append((last_word, last_charset))
    return collapsed
Example #11
0
def decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (string, charset) pairs containing each of the decoded
    parts of the header.  Charset is None for non-encoded parts of the header,
    otherwise a lower-case string containing the name of the character set
    specified in the encoded string.

    header may be a string that may or may not contain RFC2047 encoded words,
    or it may be a Header object.

    An email.errors.HeaderParseError may be raised when certain decoding error
    occurs (e.g. a base64 decoding exception).
    """
    if hasattr(header, '_chunks'):
        return [(_charset._encode(string, str(charset)), str(charset))
                for string, charset in header._chunks]
    elif not ecre.search(header):
        return [(header, None)]
    else:
        words = []
        for line in header.splitlines():
            parts = ecre.split(line)
            first = True
            while parts:
                unencoded = parts.pop(0)
                if first:
                    unencoded = unencoded.lstrip()
                    first = False
                if unencoded:
                    words.append((unencoded, None, None))
                if parts:
                    charset = parts.pop(0).lower()
                    encoding = parts.pop(0).lower()
                    encoded = parts.pop(0)
                    words.append((encoded, encoding, charset))

        droplist = []
        for n, w in enumerate(words):
            if n > 1:
                if w[1]:
                    if words[(n - 2)][1]:
                        if words[(n - 1)][0].isspace():
                            droplist.append(n - 1)

        for d in reversed(droplist):
            del words[d]

        decoded_words = []
        for encoded_string, encoding, charset in words:
            if encoding is None:
                decoded_words.append((encoded_string, charset))
            elif encoding == 'q':
                word = email.quoprimime.header_decode(encoded_string)
                decoded_words.append((word, charset))
            elif encoding == 'b':
                paderr = len(encoded_string) % 4
                if paderr:
                    encoded_string += '==='[:4 - paderr]
                try:
                    word = email.base64mime.decode(encoded_string)
                except binascii.Error:
                    raise HeaderParseError('Base64 decoding error')
                else:
                    decoded_words.append((word, charset))
            else:
                raise AssertionError('Unexpected encoding: ' + encoding)

        collapsed = []
        last_word = last_charset = None
        for word, charset in decoded_words:
            if isinstance(word, str):
                word = bytes(word, 'raw-unicode-escape')
            if last_word is None:
                last_word = word
                last_charset = charset
            elif charset != last_charset:
                collapsed.append((last_word, last_charset))
                last_word = word
                last_charset = charset
            elif last_charset is None:
                last_word += BSPACE + word
            else:
                last_word += word

        collapsed.append((last_word, last_charset))
        return collapsed
Example #12
0
def decode_header(header):
    if hasattr(header, '_chunks'):
        return [(_charset._encode(string, str(charset)), str(charset))
                for (string, charset) in header._chunks]
    if not ecre.search(header):
        return [(header, None)]
    words = []
    for line in header.splitlines():
        parts = ecre.split(line)
        first = True
        while parts:
            unencoded = parts.pop(0)
            if first:
                unencoded = unencoded.lstrip()
                first = False
            if unencoded:
                words.append((unencoded, None, None))
            while parts:
                charset = parts.pop(0).lower()
                encoding = parts.pop(0).lower()
                encoded = parts.pop(0)
                words.append((encoded, encoding, charset))
                continue
    import sys
    droplist = []
    for (n, w) in enumerate(words):
        while n > 1 and (w[1]
                         and words[n - 2][1]) and words[n - 1][0].isspace():
            droplist.append(n - 1)
    for d in reversed(droplist):
        del words[d]
    decoded_words = []
    for (encoded_string, encoding, charset) in words:
        if encoding is None:
            decoded_words.append((encoded_string, charset))
        elif encoding == 'q':
            word = email.quoprimime.header_decode(encoded_string)
            decoded_words.append((word, charset))
        elif encoding == 'b':
            paderr = len(encoded_string) % 4
            if paderr:
                encoded_string += '==='[:4 - paderr]
            try:
                word = email.base64mime.decode(encoded_string)
            except binascii.Error:
                raise HeaderParseError('Base64 decoding error')
            decoded_words.append((word, charset))
        else:
            raise AssertionError('Unexpected encoding: ' + encoding)
    collapsed = []
    last_word = last_charset = None
    for (word, charset) in decoded_words:
        if isinstance(word, str):
            word = bytes(word, 'raw-unicode-escape')
        if last_word is None:
            last_word = word
            last_charset = charset
        elif charset != last_charset:
            collapsed.append((last_word, last_charset))
            last_word = word
            last_charset = charset
        elif last_charset is None:
            last_word += BSPACE + word
        else:
            last_word += word
    collapsed.append((last_word, last_charset))
    return collapsed