def adjust_content_type(content_type, body=None, filename=None): """Adjust content type based on filename or body contents """ if filename and str(content_type) == 'application/octet-stream': # check if our internal guess returns anything guessed = _guess_type(filename) if guessed: return guessed # our internal attempt didn't return anything, use mimetypes guessed = mimetypes.guess_type(filename)[0] if guessed: main, sub = fix_content_type( guessed, default=('application', 'octet-stream')) content_type = ContentType(main, sub) if content_type.main == 'image' and body: image_preamble = body[:32] if six.PY3 and isinstance(body, six.text_type): image_preamble = image_preamble.encode('utf-8', 'ignore') sub = imghdr.what(None, image_preamble) if sub: content_type = ContentType('image', sub) elif content_type.main == 'audio' and body: sub = _email.detect_audio_type(body) if sub: content_type = ContentType('audio', sub) return content_type
def adjust_content_type(content_type, body=None, filename=None): """Adjust content type based on filename or body contents """ if filename and str(content_type) == 'application/octet-stream': # check if our internal guess returns anything guessed = _guess_type(filename) if guessed: return guessed # our internal attempt didn't return anything, use mimetypes guessed = mimetypes.guess_type(filename)[0] if guessed: main, sub = fix_content_type( guessed, default=('application', 'octet-stream')) content_type = ContentType(main, sub) if content_type.main == 'image' and body: sub = imghdr.what(None, body) if sub: content_type = ContentType('image', sub) elif content_type.main == 'audio' and body: sub = audio._whatsnd(body) if sub: content_type = ContentType('audio', sub) return content_type
def _guess_type(filename): """ Internal content type guesser. This is used to hard code certain tricky content-types that heuristic content type checker get wrong. """ if filename.endswith(".bz2"): return ContentType("application", "x-bzip2") if filename.endswith(".gz"): return ContentType("application", "x-gzip") return None
def _guess_type(filename): """ Internal content type guesser. This is used to hard code certain tricky content-types that heuristic content type checker get wrong. """ if filename.endswith('.bz2'): return ContentType('application', 'x-bzip2') if filename.endswith('.gz'): return ContentType('application', 'x-gzip') return None
def charset(self, value): charset = value.lower() self.content_type.set_charset(value) if 'Content-Type' not in self.headers: self.headers['Content-Type'] = ContentType('text', 'plain', {}) self.headers['Content-Type'].params['charset'] = charset self.headers.changed = True
def grab_headers(pointer, iterator, parent): """This function collects all tokens till the boundary or the end of the message. Used to scan parts of the message that contain random headers, e.g. text/rfc822-headers""" content_type = None while True: iterator.check() end = iterator.next() # remember the first content-type we have met when grabbing # the headers until the boundary or message end if not content_type and end.is_content_type(): content_type = end if not end.is_content_type(): break return make_part( content_type=content_type or ContentType("text", "plain"), start=pointer, end=end, iterator=iterator, parent=parent)
def decode_charset(ctype, body): if ctype.main != 'text': return body, ctype mime_type = magic.from_buffer(body, mime=True) mime_type = ContentType(*mime_type.split("/", 1)) if mime_type.main != 'text': return body, mime_type charset = ctype.get_charset() body = charsets.convert_to_unicode(charset, body) # for text/html unicode bodies make sure to replace # the whitespace (0xA0) with Outlook is reported to # have a bug there if ctype.sub =='html' and charset == 'utf-8': # Outlook bug body = body.replace(u'\xa0', u' ') return body, ctype
def make_part(content_type, start, end, iterator, parts=(), enclosed=None, parent=None, badmime=False): # here we detect where the message really starts # the exact position in the string, at the end of the # starting boundary and after the beginning of the end boundary if start.is_boundary(): start = start.end + 1 else: start = start.start # if this is the message ending, end of part # the position of the last symbol of the message if end.is_end(): end = len(iterator.string) - 1 # for multipart boundaries # consider the final boundary as the ending one elif content_type.is_multipart(): end = end.end # otherwise, end is position of the the symbol before # the boundary start else: end = end.start - 1 # our tokenizer detected the beginning of the message container # that is separated from the enclosed message by newlines # here we find where the enclosed message begins by searching for the # first newline if parent and (parent.is_message_container() or parent.is_headers_container()): start = locate_first_newline(iterator.stream, start) if badmime: content_type = ContentType('application', 'octet-stream') # ok, finally, create the MimePart. # note that it does not parse anything, just remembers # the position in the string return MimePart(container=Stream(content_type=content_type, start=start, end=end, stream=iterator.stream, string=iterator.string), badmime=badmime, parts=parts, enclosed=enclosed, is_root=(parent == None))
def adjust_content_type(content_type, body=None, filename=None): """Adjust content type based on filename or body contents """ if filename and str(content_type) == 'application/octet-stream': guessed = mimetypes.guess_type(filename)[0] if guessed: main, sub = fix_content_type(guessed, default=('application', 'octet-stream')) content_type = ContentType(main, sub) if content_type.main == 'image' and body: sub = imghdr.what(None, body) if sub: content_type = ContentType('image', sub) elif content_type.main == 'audio' and body: sub = audio._whatsnd(body) if sub: content_type = ContentType('audio', sub) return content_type
def default_content_type(): return ContentType("text", "plain", {'charset': 'ascii'})
# and its body. ^(\r\n|\n) ) """, re.IGNORECASE | re.MULTILINE | re.VERBOSE) _CTYPE = 'ctype' _BOUNDARY = 'boundary' _END = End() _MAX_OPS = 500 _SECTION_HEADERS = 'headers' _SECTION_MULTIPART_PREAMBLE = 'multipart-preamble' _SECTION_MULTIPART_EPILOGUE = 'multipart-epilogue' _SECTION_BODY = 'body' _DEFAULT_CONTENT_TYPE = ContentType('text', 'plain', {'charset': 'us-ascii'}) _EMPTY_LINE = '\r\n' def tokenize(string): """ Scans the entire message to find all Content-Types and boundaries. """ tokens = deque() for m in _RE_TOKENIZER.finditer(string): if m.group(_CTYPE): name, token = parsing.parse_header(m.group(_CTYPE)) elif m.group(_BOUNDARY): token = Boundary( m.group(_BOUNDARY).strip("\t\r\n"), _grab_newline(m.start(), string, -1),