Ejemplo n.º 1
0
def adjust_content_type(content_type, body=None, filename=None):
    """Adjust content type based on filename or body contents
    """
    if filename and str(content_type) == 'application/octet-stream':
        # check if our internal guess returns anything
        guessed = _guess_type(filename)
        if guessed:
            return guessed

        # our internal attempt didn't return anything, use mimetypes
        guessed = mimetypes.guess_type(filename)[0]
        if guessed:
            main, sub = fix_content_type(
                guessed, default=('application', 'octet-stream'))
            content_type = ContentType(main, sub)

    if content_type.main == 'image' and body:
        image_preamble = body[:32]
        if six.PY3 and isinstance(body, six.text_type):
            image_preamble = image_preamble.encode('utf-8', 'ignore')

        sub = imghdr.what(None, image_preamble)
        if sub:
            content_type = ContentType('image', sub)

    elif content_type.main == 'audio' and body:
        sub = _email.detect_audio_type(body)
        if sub:
            content_type = ContentType('audio', sub)

    return content_type
Ejemplo n.º 2
0
def adjust_content_type(content_type, body=None, filename=None):
    """Adjust content type based on filename or body contents
    """
    if filename and str(content_type) == 'application/octet-stream':
        # check if our internal guess returns anything
        guessed = _guess_type(filename)
        if guessed:
            return guessed

        # our internal attempt didn't return anything, use mimetypes
        guessed = mimetypes.guess_type(filename)[0]
        if guessed:
            main, sub = fix_content_type(
                guessed, default=('application', 'octet-stream'))
            content_type = ContentType(main, sub)

    if content_type.main == 'image' and body:
        sub = imghdr.what(None, body)
        if sub:
            content_type = ContentType('image', sub)

    elif content_type.main == 'audio' and body:
        sub = audio._whatsnd(body)
        if sub:
            content_type = ContentType('audio', sub)

    return content_type
Ejemplo n.º 3
0
def _guess_type(filename):
    """
    Internal content type guesser. This is used to hard code certain tricky content-types
    that heuristic content type checker get wrong.
    """

    if filename.endswith(".bz2"):
        return ContentType("application", "x-bzip2")

    if filename.endswith(".gz"):
        return ContentType("application", "x-gzip")

    return None
Ejemplo n.º 4
0
def _guess_type(filename):
    """
    Internal content type guesser. This is used to hard code certain tricky content-types
    that heuristic content type checker get wrong.
    """

    if filename.endswith('.bz2'):
        return ContentType('application', 'x-bzip2')

    if filename.endswith('.gz'):
        return ContentType('application', 'x-gzip')

    return None
Ejemplo n.º 5
0
 def charset(self, value):
     charset = value.lower()
     self.content_type.set_charset(value)
     if 'Content-Type' not in self.headers:
         self.headers['Content-Type'] = ContentType('text', 'plain', {})
     self.headers['Content-Type'].params['charset'] = charset
     self.headers.changed = True
Ejemplo n.º 6
0
def grab_headers(pointer, iterator, parent):
    """This function collects all tokens till the boundary
    or the end of the message. Used to scan parts of the message
    that contain random headers, e.g. text/rfc822-headers"""

    content_type = None
    while True:

        iterator.check()
        end = iterator.next()

        # remember the first content-type we have met when grabbing
        # the headers until the boundary or message end
        if not content_type and end.is_content_type():
            content_type = end

        if not end.is_content_type():
            break

    return make_part(
        content_type=content_type or ContentType("text", "plain"),
        start=pointer,
        end=end,
        iterator=iterator,
        parent=parent)
Ejemplo n.º 7
0
def decode_charset(ctype, body):
    if ctype.main != 'text':
        return body, ctype

    mime_type = magic.from_buffer(body, mime=True)
    mime_type = ContentType(*mime_type.split("/", 1))
    if mime_type.main != 'text':
        return body, mime_type

    charset = ctype.get_charset()
    body = charsets.convert_to_unicode(charset, body)

    # for text/html unicode bodies make sure to replace
    # the whitespace (0xA0) with   Outlook is reported to
    # have a bug there
    if ctype.sub =='html' and charset == 'utf-8':
        # Outlook bug
        body = body.replace(u'\xa0', u' ')

    return body, ctype
Ejemplo n.º 8
0
def make_part(content_type,
              start,
              end,
              iterator,
              parts=(),
              enclosed=None,
              parent=None,
              badmime=False):

    # here we detect where the message really starts
    # the exact position in the string, at the end of the
    # starting boundary and after the beginning of the end boundary
    if start.is_boundary():
        start = start.end + 1
    else:
        start = start.start

    # if this is the message ending, end of part
    # the position of the last symbol of the message
    if end.is_end():
        end = len(iterator.string) - 1
    # for multipart boundaries
    # consider the final boundary as the ending one
    elif content_type.is_multipart():
        end = end.end
    # otherwise, end is position of the the symbol before
    # the boundary start
    else:
        end = end.start - 1

    # our tokenizer detected the beginning of the message container
    # that is separated from the enclosed message by newlines
    # here we find where the enclosed message begins by searching for the
    # first newline
    if parent and (parent.is_message_container()
                   or parent.is_headers_container()):
        start = locate_first_newline(iterator.stream, start)

    if badmime:
        content_type = ContentType('application', 'octet-stream')

    # ok, finally, create the MimePart.
    # note that it does not parse anything, just remembers
    # the position in the string
    return MimePart(container=Stream(content_type=content_type,
                                     start=start,
                                     end=end,
                                     stream=iterator.stream,
                                     string=iterator.string),
                    badmime=badmime,
                    parts=parts,
                    enclosed=enclosed,
                    is_root=(parent == None))
Ejemplo n.º 9
0
def adjust_content_type(content_type, body=None, filename=None):
    """Adjust content type based on filename or body contents
    """
    if filename and str(content_type) == 'application/octet-stream':
        guessed = mimetypes.guess_type(filename)[0]
        if guessed:
            main, sub = fix_content_type(guessed,
                                         default=('application',
                                                  'octet-stream'))
            content_type = ContentType(main, sub)

    if content_type.main == 'image' and body:
        sub = imghdr.what(None, body)
        if sub:
            content_type = ContentType('image', sub)

    elif content_type.main == 'audio' and body:
        sub = audio._whatsnd(body)
        if sub:
            content_type = ContentType('audio', sub)

    return content_type
Ejemplo n.º 10
0
def default_content_type():
    return ContentType("text", "plain", {'charset': 'ascii'})
Ejemplo n.º 11
0
        # and its body.
        ^(\r\n|\n)
    )
    """, re.IGNORECASE | re.MULTILINE | re.VERBOSE)

_CTYPE = 'ctype'
_BOUNDARY = 'boundary'
_END = End()
_MAX_OPS = 500

_SECTION_HEADERS = 'headers'
_SECTION_MULTIPART_PREAMBLE = 'multipart-preamble'
_SECTION_MULTIPART_EPILOGUE = 'multipart-epilogue'
_SECTION_BODY = 'body'

_DEFAULT_CONTENT_TYPE = ContentType('text', 'plain', {'charset': 'us-ascii'})
_EMPTY_LINE = '\r\n'


def tokenize(string):
    """
    Scans the entire message to find all Content-Types and boundaries.
    """
    tokens = deque()
    for m in _RE_TOKENIZER.finditer(string):
        if m.group(_CTYPE):
            name, token = parsing.parse_header(m.group(_CTYPE))
        elif m.group(_BOUNDARY):
            token = Boundary(
                m.group(_BOUNDARY).strip("\t\r\n"),
                _grab_newline(m.start(), string, -1),