def get_unicode(self, key, failobj=None, errors="strict"): value = self.get(key, None) if value is None: return failobj header = email.header.Header() for string, encoding in email.header.decode_header(value): header.append(string, encoding, errors) return unicode(header)
def header_to_string(headervalue): decoded = email.header.decode_header(headervalue) header = email.header.Header() for p in decoded: try: header.append(p[0], p[1]) except UnicodeDecodeError: print("error handing header '%s'" % headervalue) print("unable to process header %s in encoding %s" %(repr(p[0]), p[1])) raise return str(header)
def ParseMessage(fd, pgpmime=True): pos = fd.tell() header = [fd.readline()] while header[-1] not in ('', '\n', '\r\n'): line = fd.readline() if line.startswith(' ') or line.startswith('\t'): header[-1] += line else: header.append(line) fd.seek(pos) if pgpmime: message = PGPMimeParser().parse(fd) else: message = email.parser.Parser().parse(fd) message.raw_header = header return message
def parse(msgdata): if PYT3: msg = email.message_from_bytes(msgdata) else: msg = email.message_from_string(msgdata) headers = accounting.utils.cidict() for name, val in msg._headers: parts = email.header.decode_header(val) header = headers.setdefault(name, []) for decoded, charset in parts: if PYT3: header.append(decoded) else: header.append(decoded.decode(charset or 'ascii')) payload = msg.get_payload(decode=True) payload = payload.decode(msg.get_content_charset()) return payload, headers
def usersAsHeader(users, header_name): header = email.header.Header(header_name=header_name) for index, user in enumerate(users): if isascii(user.fullname): header.append(user.fullname, "us-ascii") else: header.append(user.fullname, "utf-8") if index < len(users) - 1: header.append("<%s>," % user.email, "us-ascii") else: header.append("<%s>" % user.email, "us-ascii") return header
def ParseMessage(fd, pgpmime=True): pos = fd.tell() if logger.isEnabledFor(logging.DEBUG): fd.seek(0, os.SEEK_END) _len = fd.tell() fd.seek(pos) logger.debug("Parsing message (fd: %s, len: %d, pos: %d)" % (fd, _len, pos)) header = [fd.readline()] while header[-1] not in ('', '\n', '\r\n'): line = fd.readline() if line.startswith(' ') or line.startswith('\t'): header[-1] += line else: header.append(line) fd.seek(pos) if GnuPG and pgpmime: message = PGPMimeParser().parse(fd) else: message = email.parser.Parser().parse(fd) message.raw_header = header return message
def format_addresses(addresses, header_name=None, charset='iso-8859-1'): """This is an extension of email.utils.formataddr. Function expect a list of addresses [ ('name', 'name@domain'), ...]. The len(header_name) is used to limit first line length. The function mix the use Header(), formataddr() and check for 'us-ascii' string to have valid and friendly 'address' header. If one 'name' is not unicode string, then it must encoded using 'charset', Header will use 'charset' to decode it. Unicode string will be encoded following the "Header" rules : ( try first using ascii, then 'charset', then 'uft8') 'name@address' is supposed to be pure us-ascii, it can be unicode string or not (but cannot contains non us-ascii) In short Header() ignore syntax rules about 'address' field, and formataddr() ignore encoding of non us-ascci chars. """ header=email.header.Header(charset=charset, header_name=header_name) for i, (name, addr) in enumerate(addresses): if i!=0: # add separator between addresses header.append(',', charset='us-ascii') # check if address name is a unicode or byte string in "pure" us-ascii try: if isinstance(name, str): # convert name in byte string name.encode('us-ascii') else: # check id byte string contains only us-ascii chars name=name.decode('us-ascii') except UnicodeError: # Header will use "RFC2047" to encode the address name # if name is byte string, charset will be used to decode it first header.append(name) # here us-ascii must be used and not default 'charset' header.append('<%s>' % (addr,), charset='us-ascii') else: # name is a us-ascii byte string, i can use formataddr formated_addr=email.utils.formataddr((name, addr)) # us-ascii must be used and not default 'charset' header.append(formated_addr, charset='us-ascii') return header
def get_mail_header_by_data(maildata): header = [] break_flag = False for x in xrange(len(maildata)): if maildata[x] == '\r': continue if maildata[x] != '\n': break_flag = False header.append(maildata[x]) continue if break_flag: break break_flag = True header.append('\n') header.append('\n') return ''.join(header)
def decode_header(self, s): if not s: return s header = [] for text, charset in email.header.decode_header(s): if not charset: header.append(to_unicode(text)) continue try: header.append(unicode(text, charset, 'replace')) except: header.append(to_unicode(text)) return u' '.join(header)
def decode_any_header(value): '''Wrapper around email.header.decode_header to absorb all errors.''' value = re.sub(r'[\r\n]\s*', ' ', value) try: chunks = email.header.decode_header(value) except email.errors.HeaderParseError: chunks = [(value, None)] header = email.header.Header() for string, charset in chunks: if charset is not None: if not isinstance(charset, email.header.Charset): charset = email.header.Charset(charset) try: try: header.append(string, charset, errors='strict') except UnicodeDecodeError: header.append(string, 'latin1', errors='strict') except: header.append(string, charset, errors='replace') return header
def send(self, recipients=None, **kwargs): """ Sends the email. ``recipients`` List of email addresses of recipients. """ if recipients is None or recipients is []: return log.debug('Getting mail template: %s' % self.template) to = ', '.join(recipients) sender = '%s <%s>' % (pylons.config['debexpo.sitename'], pylons.config['debexpo.email']) c = FakeC(to=to, sender=sender, config=pylons.config, **kwargs) template_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates/email/%s.mako' % self.template) lookup = TemplateLookup(directories=[os.path.dirname(template_file)]) template = Template(filename=template_file, lookup=lookup, module_directory=pylons.config['app_conf']['cache_dir']) # Temporarily set up routes.util.url_for as the URL renderer used for h.url() in templates pylons.url._push_object(routes.util.url_for) rendered_message = template.render_unicode(_=gettext, h=h, c=c).encode("utf-8") try: # Parse the email message message = email.message_from_string(rendered_message) except email.errors.MessageParseError: # Parsing the message failed, let's send the raw data... message = rendered_message.encode("utf-8") else: # By default, python base64-encodes all UTF-8 text which is annoying. Force quoted-printable email.charset.add_charset('utf-8', email.charset.QP, email.charset.QP, 'utf-8') # Create a new, MIME-aware message new_message = email.mime.text.MIMEText(message.get_payload().decode("utf-8"), "plain", "utf-8") for key in message.keys(): try: contents = message[key].decode("utf-8").split(u" ") except UnicodeDecodeError: # Bad encoding in the header, don't try to do anything more... header = message[key] else: # Do some RFC2047-encoding of the headers. We split on word-boundaries so that # python doesn't encode the whole header in a RFC2047 blob, but only what's # needed. header = email.header.Header() for c in contents: header.append(c) new_message[key] = header # And get that back as a string to pass onto sendmail message = new_message.as_string() pylons.url._pop_object() log.debug('Starting SMTP session to %s' % self.server) session = smtplib.SMTP(self.server) if self.auth: log.debug('Authentication requested; logging in') session.login(self.auth['user'], self.auth['password']) log.debug('Sending email to %s' % ', '.join(recipients)) result = session.sendmail(pylons.config['debexpo.email'], recipients, message) if result: # Something went wrong. for recipient in result.keys(): log.critical('Failed sending to %s: %s, %s' % (recipient, result[recipient][0], result[recipient][1])) else: log.debug('Successfully sent')
def format_addresses(addresses, header_name=None, charset=None): """ Convert a list of addresses into a MIME-compliant header for a From, To, Cc, or any other I{address} related field. This mixes the use of email.utils.formataddr() and email.header.Header(). @type addresses: list @param addresses: list of addresses, can be a mix of string a tuple of the form C{[ 'address@domain', (u'Name', 'name@domain'), ...]}. If C{u'Name'} contains non us-ascii characters, it must be a unicode string or encoded using the I{charset} argument. @type header_name: string or None @keyword header_name: the name of the header. Its length is used to limit the length of the first line of the header according the RFC's requirements. (not very important, but it's better to match the requirements when possible) @type charset: str @keyword charset: the encoding charset for non unicode I{name} and a B{hint} for encoding of unicode string. In other words, if the I{name} of an address in a byte string containing non I{us-ascii} characters, then C{name.decode(charset)} must generate the expected result. If a unicode string is used instead, charset will be tried to encode the string, if it fail, I{utf-8} will be used. With B{Python 3.x} I{charset} is no more a hint and an exception will be raised instead of using I{utf-8} has a fall back. @rtype: str @return: the encoded list of formated addresses separated by commas, ready to use as I{Header} value. >>> print format_addresses([('John', '*****@*****.**') ], 'From', 'us-ascii').encode() John <*****@*****.**> >>> print format_addresses([(u'l\\xe9o', '*****@*****.**') ], 'To', 'iso-8859-1').encode() =?iso-8859-1?q?l=E9o?= <*****@*****.**> >>> print format_addresses([(u'l\\xe9o', '*****@*****.**') ], 'To', 'us-ascii').encode() ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?utf-8?q?l=C3=A9o?= <*****@*****.**> >>> # because u'l\xe9o' cannot be encoded into us-ascii, utf8 is used instead >>> print format_addresses([('No\\xe9', '*****@*****.**'), (u'M\u0101ori', '*****@*****.**') ], 'Cc', 'iso-8859-1').encode() ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?iso-8859-1?q?No=E9?= <*****@*****.**> , =?utf-8?b?TcSBb3Jp?= <*****@*****.**> >>> # 'No\xe9' is already encoded into iso-8859-1, but u'M\u0101ori' cannot be encoded into iso-8859-1 >>> # then utf8 is used here >>> print format_addresses(['*****@*****.**', ('John', '*****@*****.**') ], 'From', 'us-ascii').encode() [email protected] , John <*****@*****.**> """ header=email.header.Header(charset=charset, header_name=header_name) for i, address in enumerate(addresses): if i!=0: # add separator between addresses header.append(',', charset='us-ascii') try: name, addr=address except ValueError: # address is not a tuple, their is no name, only email address header.append(address, charset='us-ascii') else: # check if address name is a unicode or byte string in "pure" us-ascii if utils.is_usascii(name): # name is a us-ascii byte string, i can use formataddr formated_addr=email.utils.formataddr((name, addr)) # us-ascii must be used and not default 'charset' header.append(formated_addr, charset='us-ascii') else: # this is not as "pure" us-ascii string # Header will use "RFC2047" to encode the address name # if name is byte string, charset will be used to decode it first header.append(name) # here us-ascii must be used and not default 'charset' header.append('<%s>' % (addr,), charset='us-ascii') return header
def send(self, recipients=None, **kwargs): """ Sends the email. ``recipients`` List of email addresses of recipients. """ if recipients is None or recipients is []: return log.debug('Getting mail template: %s' % self.template) to = ', '.join(recipients) sender = '%s <%s>' % (pylons.config['debexpo.sitename'], pylons.config['debexpo.email']) c = FakeC(to=to, sender=sender, config=pylons.config, **kwargs) template_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates/email/%s.mako' % self.template) lookup = TemplateLookup(directories=[os.path.dirname(template_file)]) template = Template(filename=template_file, lookup=lookup, module_directory=pylons.config['app_conf']['cache_dir']) # Temporarily set up routes.util.url_for as the URL renderer used for h.url() in templates pylons.url._push_object(routes.util.url_for) rendered_message = template.render_unicode(_=gettext, h=h, c=c).encode("utf-8") try: # Parse the email message message = email.message_from_string(rendered_message) except email.errors.MessageParseError: # Parsing the message failed, let's send the raw data... message = rendered_message.encode("utf-8") else: # By default, python base64-encodes all UTF-8 text which is annoying. Force quoted-printable email.charset.add_charset('utf-8', email.charset.QP, email.charset.QP, 'utf-8') # Create a new, MIME-aware message new_message = email.mime.text.MIMEText(message.get_payload().decode("utf-8"), "plain", "utf-8") for key in message.keys(): try: contents = message[key].decode("utf-8").split(u" ") except UnicodeDecodeError: # Bad encoding in the header, don't try to do anything more... header = message[key] else: # Do some RFC2047-encoding of the headers. We split on word-boundaries so that # python doesn't encode the whole header in a RFC2047 blob, but only what's # needed. header = email.header.Header() for c in contents: header.append(c) new_message[key] = header # And get that back as a string to pass onto sendmail message = new_message.as_string() pylons.url._pop_object() log.debug('Starting SMTP session to %s:%s' % (self.server, self.port)) session = smtplib.SMTP(self.server, self.port) if self.auth: log.debug('Authentication requested; logging in') session.login(self.auth['user'], self.auth['password']) log.debug('Sending email to %s' % ', '.join(recipients)) result = session.sendmail(pylons.config['debexpo.email'], recipients, message) if result: # Something went wrong. for recipient in result.keys(): log.critical('Failed sending to %s: %s, %s' % (recipient, result[recipient][0], result[recipient][1])) else: log.debug('Successfully sent')
def usersAsHeader(users, header_name): header = email.header.Header(header_name=header_name) for index, user in enumerate(users): if isascii(user.fullname): header.append(user.fullname, "us-ascii") else: header.append(user.fullname, "utf-8") if index < len(users) - 1: if header_name == "From" and configuration.smtp.USE_SYSTEM_MAIL_FOR_FROM_FIELD == True: header.append("<%s>," % configuration.base.SYSTEM_USER_EMAIL, "us-ascii") else: header.append("<%s>," % user.email, "us-ascii") else: if header_name == "From" and configuration.smtp.USE_SYSTEM_MAIL_FOR_FROM_FIELD == True: header.append("<%s>," % configuration.base.SYSTEM_USER_EMAIL, "us-ascii") else: header.append("<%s>" % user.email, "us-ascii") return header
def __parse(self, binary): # CR は全て削除。 binary = binary.replace(b'\r', b'') # print(binary) # ヘッダとボディに分離。 if binary[0] != 0x0a: hofs = binary.find(b'\n\n') if hofs == -1: raise RuntimeError('ヘッダの終端が見つからない。') header_binary = binary[:hofs] # 最後の LR は除く。 body_binary = binary[hofs + 2:] else: header_binary = b'' body_binary = binary[1:] # ヘッダを行ごとに分離。継続行も処理。 header = [] for line in header_binary.split(b'\n'): if len(line) == 0: continue first_byte = line[0] if first_byte == 32 or first_byte == 9: for i in range(len(line)): if line[i] not in (32, 9): break header[len(header) - 1] += b' ' + line[i:] else: header.append(line) subject = None content_type = None charset = None boundary = None content_transfer_encoding = None for hdr in header: m = re.match(self.HEADER_CONTENT_TYPE_REGEX, hdr) if m: content_type = m.group(1).decode('utf-8', 'replace').lower() #print('content_type=', content_type) m = re.search(self.HEADER_CONTENT_TYPE_CHARSET_REGEX, hdr) if m: charset = m.group(2) if not charset: charset = m.group(3) charset = charset.decode('utf-8', 'replace').lower() #print('charset=', charset) m = re.search(self.HEADER_CONTENT_TYPE_BOUNDARY_REGEX, hdr) if m: boundary = m.group(2) if not boundary: boundary = m.group(3) #print('boundary=', boundary) m = re.match(self.HEADER_CONTENT_TRANSFER_ENCODING_REGEX, hdr) if m: content_transfer_encoding = m.group(1).decode( 'utf-8', 'replace').lower() #print('content_transfer_encoding=', content_transfer_encoding) m = re.match(self.HEADER_SUBJECT, hdr) if m: subject = m.group(1) if subject: subject = self.__decode_partially_encoded(subject) self.__mail.Subject.append(subject) if content_transfer_encoding == 'quoted-printable': body_binary = self.__decode_quoted_printable(body_binary) elif content_transfer_encoding == 'base64': body_binary = self.__decode_base64(body_binary) if content_type == 'text/plain': if charset: text = body_binary.decode(charset, 'replace') else: text = body_binary.decode('ascii', 'replace') self.__mail.texts.append(text) return if content_type == 'text/html': soup = BeautifulSoup(body_binary, 'lxml') for s in soup(['script', 'style']): s.decompose() self.__mail.texts.append(' '.join(soup.stripped_strings)) return if isinstance(content_type, str) and content_type.startswith('multipart/'): bry = b'\n--' + boundary + b'--\n' endpos = body_binary.find(bry) if endpos == -1: raise RuntimeError('multipart terminator not found.') endpos += 1 # \n の分。 bry = b'--' + boundary + b'\n' begpos = 0 while begpos >= 0: begpos = body_binary.find(bry, begpos, endpos) if begpos == -1: raise RuntimeError('multipart not begin.') if begpos == 0: break if body_binary[begpos - 1] == 0x0a: break begpos += 1 begpos += len(bry) body_binaries = body_binary[begpos:endpos].split(bry) for body_bin in body_binaries: self.__parse(body_bin)
def format_addresses(addresses, header_name=None, charset=None): """ Convert a list of addresses into a MIME-compliant header for a From, To, Cc, or any other I{address} related field. This mixes the use of email.utils.formataddr() and email.header.Header(). @type addresses: list @param addresses: list of addresses, can be a mix of string a tuple of the form C{[ 'address@domain', (u'Name', 'name@domain'), ...]}. If C{u'Name'} contains non us-ascii characters, it must be a unicode string or encoded using the I{charset} argument. @type header_name: string or None @keyword header_name: the name of the header. Its length is used to limit the length of the first line of the header according the RFC's requirements. (not very important, but it's better to match the requirements when possible) @type charset: str @keyword charset: the encoding charset for non unicode I{name} and a B{hint} for encoding of unicode string. In other words, if the I{name} of an address in a byte string containing non I{us-ascii} characters, then C{name.decode(charset)} must generate the expected result. If a unicode string is used instead, charset will be tried to encode the string, if it fail, I{utf-8} will be used. With B{Python 3.x} I{charset} is no more a hint and an exception will be raised instead of using I{utf-8} has a fall back. @rtype: str @return: the encoded list of formated addresses separated by commas, ready to use as I{Header} value. >>> print(format_addresses([('John', '*****@*****.**') ], 'From', 'us-ascii').encode()) John <*****@*****.**> >>> print(format_addresses([('l\\xe9o', '*****@*****.**') ], 'To', 'iso-8859-1').encode()) =?iso-8859-1?q?l=E9o?= <*****@*****.**> >>> print(format_addresses([('l\\xe9o', '*****@*****.**') ], 'To', 'us-ascii').encode()) ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?utf-8?q?l=C3=A9o?= <*****@*****.**> >>> # because u'l\xe9o' cannot be encoded into us-ascii, utf8 is used instead >>> print(format_addresses([('No\\xe9', '*****@*****.**'), ('M\\u0101ori', '*****@*****.**') ], 'Cc', 'iso-8859-1').encode()) ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?iso-8859-1?q?No=E9?= <*****@*****.**> , =?utf-8?b?TcSBb3Jp?= <*****@*****.**> >>> # 'No\xe9' is already encoded into iso-8859-1, but u'M\\u0101ori' cannot be encoded into iso-8859-1 >>> # then utf8 is used here >>> print(format_addresses(['*****@*****.**', ('John', '*****@*****.**') ], 'From', 'us-ascii').encode()) [email protected] , John <*****@*****.**> """ header=email.header.Header(charset=charset, header_name=header_name) for i, address in enumerate(addresses): if i!=0: # add separator between addresses header.append(',', charset='us-ascii') try: name, addr=address except ValueError: # address is not a tuple, their is no name, only email address header.append(address, charset='us-ascii') else: # check if address name is a unicode or byte string in "pure" us-ascii if utils.is_usascii(name): # name is a us-ascii byte string, i can use formataddr formated_addr=email.utils.formataddr((name, addr)) # us-ascii must be used and not default 'charset' header.append(formated_addr, charset='us-ascii') else: # this is not as "pure" us-ascii string # Header will use "RFC2047" to encode the address name # if name is byte string, charset will be used to decode it first header.append(name) # here us-ascii must be used and not default 'charset' header.append('<%s>' % (addr,), charset='us-ascii') return header
def usersAsHeader(users, header_name): header = email.header.Header(header_name=header_name) for index, user in enumerate(users): if isascii(user.fullname): header.append(user.fullname, "us-ascii") else: header.append(user.fullname, "utf-8") if index < len(users) - 1: if header_name == "From" and configuration.smtp.USE_SYSTEM_MAIL_FOR_FROM_FIELD == True: header.append( "<%s>," % configuration.base.SYSTEM_USER_EMAIL, "us-ascii") else: header.append("<%s>," % user.email, "us-ascii") else: if header_name == "From" and configuration.smtp.USE_SYSTEM_MAIL_FOR_FROM_FIELD == True: header.append( "<%s>," % configuration.base.SYSTEM_USER_EMAIL, "us-ascii") else: header.append("<%s>" % user.email, "us-ascii") return header