def _build_body_strings(self): if not self.has_built_body_strings: self.body_plain = u'' self.body_html = u'' for part in typed_subpart_iterator(self.raw, 'text', 'plain'): section_encoding = message_part_charset(part, self.raw) or self.charset section_text = utf8_encode_message_part(part, self.raw, section_encoding) if is_encoding_error(section_text): self.encoding_error = section_text else: self.body_plain += section_text for part in typed_subpart_iterator(self.raw, 'text', 'html'): section_encoding = message_part_charset(part, self.raw) or self.charset section_text = utf8_encode_message_part(part, self.raw, section_encoding) if is_encoding_error(section_text): self.encoding_error = section_text else: self.body_html += section_text self.has_built_body_strings = True
def get_pending_mail(userdesc, perms, mlist, id, raw=0): """ Get informations about a given mail moderation. @mlist @lock @admin """ ptime, sender, subject, reason, filename, msgdata = mlist.GetRecord(int(id)) fpath = os.path.join(mm_cfg.DATA_DIR, filename) size = os.path.getsize(fpath) msg = readMessage(fpath) if raw: return quote(str(msg)) results_plain = [] results_html = [] for part in typed_subpart_iterator(msg, 'text', 'plain'): c = part.get_payload() if c is not None: results_plain.append (c) results_plain = map(lambda x: quote(x), results_plain) for part in typed_subpart_iterator(msg, 'text', 'html'): c = part.get_payload() if c is not None: results_html.append (c) results_html = map(lambda x: quote(x), results_html) return {'id' : id, 'sender': quote(sender, True), 'size' : size, 'subj' : quote(subject, True), 'stamp' : ptime, 'parts_plain' : results_plain, 'parts_html': results_html }
def _makeMessageInfo(self, message): """Given an email.Message, return an object with subjectHeader, bodySummary and other header (as needed) attributes. These objects are passed into appendMessages by onReview - passing email.Message objects directly uses too much memory.""" subjectHeader = message["Subject"] or "(none)" headers = {"subject" : subjectHeader} for header in options["html_ui", "display_headers"]: headers[header.lower()] = (message[header] or "(none)") score = message[options["Headers", "score_header_name"]] if score: # the score might have the log info at the end op = score.find('(') if op >= 0: score = score[:op] try: score = "%.2f%%" % (float(score)*100,) except ValueError: # Hmm. The score header should only contain a floating # point number. What's going on here, then? score = "Err" # Let the user know something is wrong. else: # If the lookup fails, this means that the "include_score" # option isn't activated. We have the choice here to either # calculate it now, which is pretty inefficient, since we have # already done so, or to admit that we don't know what it is. # We'll go with the latter. score = "?" try: part = typed_subpart_iterator(message, 'text', 'plain').next() text = part.get_payload() except StopIteration: try: part = typed_subpart_iterator(message, 'text', 'html').next() text = part.get_payload() text, unused = tokenizer.crack_html_style(text) text, unused = tokenizer.crack_html_comment(text) text = tokenizer.html_re.sub(' ', text) text = '(this message only has an HTML body)\n' + text except StopIteration: text = '(this message has no text body)' if type(text) == type([]): # gotta be a 'right' way to do this text = "(this message is a digest of %s messages)" % (len(text)) elif text is None: text = "(this message has no body)" else: text = text.replace(' ', ' ') # Else they'll be quoted text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines text = text.strip() class _MessageInfo: pass messageInfo = _MessageInfo() for headerName, headerValue in headers.items(): headerValue = self._trimHeader(headerValue, 45, True) setattr(messageInfo, "%sHeader" % (headerName,), headerValue) messageInfo.score = score messageInfo.bodySummary = self._trimHeader(text, 200) return messageInfo
def _makeMessageInfo(self, message): """Given an email.Message, return an object with subjectHeader, bodySummary and other header (as needed) attributes. These objects are passed into appendMessages by onReview - passing email.Message objects directly uses too much memory. """ message.delNotations() subjectHeader = message["Subject"] or "(none)" headers = {"subject": subjectHeader} for header in options["html_ui", "display_headers"]: headers[header.lower()] = message[header] or "(none)" score = message[options["Headers", "score_header_name"]] if score: op = score.find("(") if op >= 0: score = score[:op] try: score = float(score) * 100 except ValueError: score = "Err" # Let the user know something is wrong. else: score = "?" try: part = typed_subpart_iterator(message, "text", "plain").next() text = part.get_payload() except StopIteration: try: part = typed_subpart_iterator(message, "text", "html").next() text = part.get_payload() text, unused = tokenizer.crack_html_style(text) text, unused = tokenizer.crack_html_comment(text) text = tokenizer.html_re.sub(" ", text) text = _("(this message only has an HTML body)\n") + text except StopIteration: text = _("(this message has no text body)") if type(text) == type([]): # gotta be a 'right' way to do this text = _("(this message is a digest of %s messages)") % (len(text)) elif text is None: text = _("(this message has no body)") else: text = text.replace(" ", " ") # Else they'll be quoted text = re.sub(r"(\s)\s+", r"\1", text) # Eg. multiple blank lines text = text.strip() class _MessageInfo: pass messageInfo = _MessageInfo() for headerName, headerValue in headers.items(): headerValue = self._trimHeader(headerValue, 45, True) setattr(messageInfo, "%sHeader" % (headerName,), headerValue) messageInfo.score = score messageInfo.bodySummary = self._trimHeader(text, 200) return messageInfo
def get_body(message): if message.is_multipart(): text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')] text_parts += [part for part in typed_subpart_iterator(message, 'application', 'pgp-encrypted')] text_parts += [part for part in typed_subpart_iterator(message, 'application', 'octet-stream')] body = [] for part in text_parts: charset = get_charset(part, get_charset(message)) payload = part.get_payload(decode=True) body.append(unicode(payload, charset, "replace")) return u"\n".join(body).strip() else: body = unicode(message.get_payload(decode=True), get_charset(message), "replace") return body.strip()
def process_body( self, msg ): email_body = "" def get_charset( msg, default="ascii" ): """Get the message charset""" if msg.get_content_charset(): return msg.get_content_charset(); if msg.get_charset(): return msg.get_charset(); return default if msg.is_multipart(): parts = [part for part in typed_subpart_iterator(msg,'text','plain')] body = [] for part in parts: charset = get_charset(part, get_charset(msg)) body.append(unicode(part.get_payload(decode=True), charset, "replace")) email_body = u"\n".join(body).strip() else: # if it is not multipart, the payload will be a string # representing the message body body = unicode(msg.get_payload(decode=True), get_charset(msg), "replace") email_body = body.strip() return { "body" : { "text" : email_body } }
def get_body(self, format=""): 'when the body is of multipart, the `format` is applicable.' 'format = plain, text only' 'format = html, html only' 'if no format specified, return both.' def _decode_msg(msg): 'decode one part of a msg' if msg: return unicode(msg.get_payload(decode=True), msg.get_content_charset() or 'utf-8', 'replace').strip().encode('utf-8') else: return "" if self.msg.is_multipart(): parts = typed_subpart_iterator(self.msg, 'text') if parts: return '\n'.join([_decode_msg(p) for p in parts]) return "" else: body = unicode(self.msg.get_payload(decode=True), self.msg.get_content_charset() or 'utf-8', "replace") return body.strip().encode('utf-8')
def get_body(self, format=""): 'when the body is of multipart, the `format` is applicable.' 'format = plain, text only' 'format = html, html only' 'if no format specified, return both.' def _decode_msg(msg): 'decode one part of a msg' if msg: return unicode( msg.get_payload(decode=True), msg.get_content_charset() or 'utf-8', 'replace' ).strip().encode('utf-8') else: return "" if self.msg.is_multipart(): parts = typed_subpart_iterator(self.msg, 'text') if parts: return '\n'.join([_decode_msg(p) for p in parts]) return "" else: body = unicode(self.msg.get_payload(decode=True), self.msg.get_content_charset() or 'utf-8', "replace") return body.strip().encode('utf-8')
def get_body(message): """Get the body of the email message""" if message.is_multipart(): # get the plain text version only text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')] body = [] for part in text_parts: charset = get_charset(part, get_charset(message)) body.append(unicode(part.get_payload(decode=True), charset, "replace")) return u"\n".join(body).strip() else: # if it is not multipart, the payload will be a string # representing the message body body = unicode(message.get_payload(decode=True), get_charset(message), "replace") return body.strip()
def extract_multipart(self, message): text_parts = [part for part in typed_subpart_iterator(message)] body = [] html = 0 for part in text_parts: charset = get_charset(part, get_charset(message)) charset = fixaliases_charset(charset) if part.get_content_subtype() != "plain": html = html + 1 try: body.append(unicode(part.get_payload(decode=True), charset, "replace")) except LookupError: body.append(unicode(part.get_payload(decode=True), "utf8", "replace")) texts = "\n".join(body) images = len([part for part in typed_subpart_iterator(message, "image")]) videos = len([part for part in typed_subpart_iterator(message, "video")]) applications = len([part for part in typed_subpart_iterator(message, "application")]) return (texts, html, images, videos, applications)
def __init__(self, context, message): # -> none """Extract the bits of interest from an RFC2822 message string. context should be a wiki page. This perhaps should do the isJunk test up front to avoid unnecessary resource usage. """ DEBUG('mailin.py processing incoming message:\n%s' % message) self.context = context self.original = message self.msg = email.message_from_string(self.original) self.date = self.msg['Date'] # flatten a multi-line subject into one line s = re.sub('\n', '', self.msg.get('Subject', '')) # convert the possibly RFC2047-encoded subject to unicode. # Only the first encoded part is used if there is more than one. # misencoded subjects are ignored. (s, enc) = decode_header(s)[0] try: self.subject = tounicode(s, enc or 'ascii') except UnicodeDecodeError: self.subject = '' self.realSubject = re.sub(r'.*?\[.*?\] ?(.*)', r'\1', self.subject) self.messageid = self.msg.get('Message-id', '') self.inreplyto = self.msg.get('In-reply-to', '') self.From = self.msg.get('From') self.FromRealName = parseaddr(self.From)[0] self.FromEmail = parseaddr(self.From)[1] self.FromUserName = (self.FromRealName or re.sub(r'@.*$', r'', self.FromEmail)) self.sender = self.msg.get('Sender') self.senderEmail = (self.sender and parseaddr(self.sender)[1]) or None tos = self.msg.get_all('to', []) ccs = self.msg.get_all('cc', []) resent_tos = self.msg.get_all('resent-to', []) resent_ccs = self.msg.get_all('resent-cc', []) self.recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) # mailing list support # XXX x-beenthere is mailman-specific - need to support ezmlm & others here #self.xbeenthere = (self.msg.get('X-BeenThere') or # re.search(r'[^\s<]+@[^\s>]+',self.msg.get('Delivered-To')).group()) # ..Type Error - configured ezmlm to provide beenthere instead (?) self.xbeenthere = self.msg.get('X-BeenThere') # the mailin body will be the message's first text/plain part # (or a null string if there is none or it's misencoded) try: firstplaintextpart = typed_subpart_iterator( self.msg, 'text', 'plain').next() # as I understand it: # first decoding, from the content-transfer-encoding, eg quoted-printabe payload = firstplaintextpart.get_payload(decode=1) # second decoding, from utf8 or whatever to unicode charset = self.msg.get_content_charset('ascii') payloadutf8 = payload.decode(charset).encode('utf-8') except (StopIteration, UnicodeDecodeError): payloadutf8 = '' self.body = cleanupBody(payloadutf8)
def check(msg): # Iterate over each message/delivery-status subpart addrs = [] for part in typed_subpart_iterator(msg, 'message', 'delivery-status'): if not part.is_multipart(): # Huh? continue # Each message/delivery-status contains a list of Message objects # which are the header blocks. Iterate over those too. for msgblock in part.get_payload(): # We try to dig out the Original-Recipient (which is optional) and # Final-Recipient (which is mandatory, but may not exactly match # an address on our list). Some MTA's also use X-Actual-Recipient # as a synonym for Original-Recipient, but some apparently use # that for other purposes :( # # Also grok out Action so we can do something with that too. action = msgblock.get('action', '').lower() # Some MTAs have been observed that put comments on the action. if action.startswith('delayed'): return Stop if not action.startswith('fail'): # Some non-permanent failure, so ignore this block continue params = [] foundp = False for header in ('original-recipient', 'final-recipient'): for k, v in msgblock.get_params([], header): if k.lower() == 'rfc822': foundp = True else: params.append(k) if foundp: # Note that params should already be unquoted. addrs.extend(params) break else: # MAS: This is a kludge, but SMTP-GATEWAY01.intra.home.dk # has a final-recipient with an angle-addr and no # address-type parameter at all. Non-compliant, but ... for param in params: if param.startswith('<') and param.endswith('>'): addrs.append(param[1:-1]) # Uniquify rtnaddrs = {} for a in addrs: if a is not None: realname, a = parseaddr(a) rtnaddrs[a] = True return rtnaddrs.keys()
def __init__(self, context, message, ): """ Extract the bits of interest from an RFC2822 message string. This perhaps should do the isJunk test up front to avoid unnecessary resource usage. """ BLATHER('mailin.py processing incoming message:\n%s' % message) #BLATHER('mailin.py processing incoming message') self.context = context self.original = message self.msg = email.message_from_string(self.original) self.date = self.msg['Date'] self.subject = re.sub(r'\n',r'',self.msg.get('Subject','')) self.realSubject = re.sub(r'.*?\[.*?\] ?(.*)',r'\1',self.subject) self.messageid = self.msg.get('Message-id','') self.inreplyto = self.msg.get('In-reply-to','') self.From = self.msg.get('From') self.FromRealName = parseaddr(self.From)[0] self.FromEmail = parseaddr(self.From)[1] self.FromUserName = (self.FromRealName or re.sub(r'@.*$',r'',self.FromEmail)) self.sender = self.msg.get('Sender') self.senderEmail = (self.sender and parseaddr(self.sender)[1]) or None tos = self.msg.get_all('to', []) ccs = self.msg.get_all('cc', []) resent_tos = self.msg.get_all('resent-to', []) resent_ccs = self.msg.get_all('resent-cc', []) self.recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) # mailing list support # XXX x-beenthere is mailman-specific - need to support ezmlm & others here #self.xbeenthere = (self.msg.get('X-BeenThere') or # re.search(r'[^\s<]+@[^\s>]+',self.msg.get('Delivered-To')).group()) # ..Type Error - configured ezmlm to provide beenthere instead (?) self.xbeenthere = self.msg.get('X-BeenThere') # raises an exception if there's no text part try: plaintextpart = typed_subpart_iterator(self.msg, 'text', 'plain').next().get_payload(decode=1) except StopIteration: plaintextpart = '' self.body = self.cleanupBody(plaintextpart)
def get_body(message): if message.is_multipart(): # payload is an iterator of sub-parts text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')] body = [] for part in text_parts: charset = get_charset(part, get_charset(message)) body.append(unicode(part.get_payload(decode=True), encoding=charset, errors="replace")) return u"\n".join(body).strip() else: # payload is a string body = unicode(message.get_payload(decode=True), encoding=get_charset(message), errors="replace") return body.strip()
def get_body(message): """Get the body of the email message""" if message.is_multipart(): #get the plain text version only text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')] body = [] for part in text_parts: charset = get_charset(part, get_charset(message)) body.append(part.get_payload(decode=True)) return "\n".join(body).strip() else: body = message.get_payload(decode=True) return body.strip()
def get_image(message): """Check if message is multipart or not. If so, then parse message body and extract image (JPEG in this version, but it would be quite easy to loop over any kinds of image types""" if message.is_multipart(): # get the FIRST image JPEG version only image_parts = [part for part in typed_subpart_iterator(message, 'image', 'jpg')] image_part = "" if len(image_parts) is 0 else image_parts[0] return return_image(image_part) else: payload = message.get_payload() attachment = "" if len(payload) is 0 else payload[1] return return_image(attachment)
def get_image(message): #Check if message is multipart or not. """If so, then parse message body and extract image (JPEG only in this version, but it would be quite easy to loop over any kinds of image types""" if message.is_multipart(): #get the FIRST image JPEG version only image_parts = [ part for part in typed_subpart_iterator(message, 'image', 'jpg') ] image_part = image_parts[0] return return_image(image_part) else: attachment = message.get_payload()[1] return return_image(attachment)
def process_message(self, email_text, decode=True): """ Loop over each part of the message to get the content. """ msg = email.message_from_string(email_text) message_content = list() msg_payload_type = None msg_payload_body = None parts = list(typed_subpart_iterator(msg, "text")) for part in parts: msg_payload_type = part.get_content_type() msg_payload_body = part.get_payload(None, True) #True decodes base64 if necessary message_content.append({msg_payload_type: msg_payload_body}) #log(message_content) return message_content
def get_content_with_type(self, message, contenttype='text/plain'): """ parse email subject, content from data. """ maintype, subtype = contenttype.split('/') parts = [part for part in typed_subpart_iterator(message, maintype, subtype)] body = [] for part in parts: charset = self.get_charset(part, self.get_charset(message)) body.append(unicode(part.get_payload(decode=True), charset, "replace")) return u"\n".join(body).strip()
def __init__(self, mlist, msg, msgdata): self.mlist = mlist self.msg = msg self.msgdata = msgdata # Only set returnaddr if the response is to go to someone other than # the address specified in the From: header (e.g. for the password # command). self.returnaddr = None self.commands = [] self.results = [] self.ignored = [] self.lineno = 0 self.subjcmdretried = 0 self.respond = True # Extract the subject header and do RFC 2047 decoding. Note that # Python 2.1's unicode() builtin doesn't call obj.__unicode__(). subj = msg.get('subject', '') try: subj = make_header(decode_header(subj)).__unicode__() # TK: Currently we don't allow 8bit or multibyte in mail command. # MAS: However, an l10n 'Re:' may contain non-ascii so ignore it. subj = subj.encode('us-ascii', 'ignore') # Always process the Subject: header first self.commands.append(subj) except (HeaderParseError, UnicodeError, LookupError): # We couldn't parse it so ignore the Subject header pass # Find the first text/plain part part = None for part in typed_subpart_iterator(msg, 'text', 'plain'): break if part is None or part is not msg: # Either there was no text/plain part or we ignored some # non-text/plain parts. self.results.append(_('Ignoring non-text/plain MIME parts')) if part is None: # E.g the outer Content-Type: was text/html return # convert message into unicode because 'utf-8' message increasing mcset = part.get_content_charset('us-ascii') body = unicode(part.get_payload(decode=True), mcset, 'replace') # text/plain parts better have string payloads assert isinstance(body, StringType) or isinstance(body, UnicodeType) lines = body.splitlines() # Use no more lines than specified self.commands.extend(lines[:mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES]) self.ignored.extend(lines[mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES:])
def process_message(self, email_text, decode=True): """ Loop over each part of the message to get the content. """ msg = email.message_from_string(email_text) message_content = list() msg_payload_type = None msg_payload_body = None parts = list(typed_subpart_iterator(msg, "text")) for part in parts: msg_payload_type = part.get_content_type() msg_payload_body = part.get_payload( None, True) #True decodes base64 if necessary message_content.append({msg_payload_type: msg_payload_body}) #log(message_content) return message_content
def __init__(self, mlist, msg, msgdata): self.mlist = mlist self.msg = msg self.msgdata = msgdata # Only set returnaddr if the response is to go to someone other than # the address specified in the From: header (e.g. for the password # command). self.returnaddr = None self.commands = [] self.results = [] self.ignored = [] self.lineno = 0 self.subjcmdretried = 0 self.respond = True # Extract the subject header and do RFC 2047 decoding. Note that # Python 2.1's unicode() builtin doesn't call obj.__unicode__(). subj = msg.get('subject', '') try: subj = make_header(decode_header(subj)).__unicode__() # TK: Currently we don't allow 8bit or multibyte in mail command. # MAS: However, an l10n 'Re:' may contain non-ascii so ignore it. subj = subj.encode('us-ascii', 'ignore') # Always process the Subject: header first self.commands.append(subj) except (HeaderParseError, UnicodeError, LookupError): # We couldn't parse it so ignore the Subject header pass # Find the first text/plain part part = None for part in typed_subpart_iterator(msg, 'text', 'plain'): break if part is None or part is not msg: # Either there was no text/plain part or we ignored some # non-text/plain parts. self.results.append(_('Ignoring non-text/plain MIME parts')) if part is None: # E.g the outer Content-Type: was text/html return body = part.get_payload(decode=True) # text/plain parts better have string payloads assert isinstance(body, StringType) or isinstance(body, UnicodeType) lines = body.splitlines() # Use no more lines than specified self.commands.extend(lines[:mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES]) self.ignored.extend(lines[mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES:])
def get_message_body(message): """Get the body of the email message""" # from http://ginstrom.com/scribbles/2007/11/19/parsing-multilingual-email-with-python/ if message.is_multipart(): text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')] body = [] for part in text_parts: charset = get_message_charset(part, get_message_charset(message)) body.append(unicode(part.get_payload(decode=True), charset, "replace")) return u"\n".join(body).strip() else: if message.get_content_type() == "text/plain": body = unicode(message.get_payload(decode=True), get_message_charset(message), "replace") return body.strip() else: return ""
def check(msg): # Iterate over each message/delivery-status subpart addrs = [] for part in typed_subpart_iterator(msg, 'message', 'delivery-status'): if not part.is_multipart(): # Huh? continue # Each message/delivery-status contains a list of Message objects # which are the header blocks. Iterate over those too. for msgblock in part.get_payload(): # We try to dig out the Original-Recipient (which is optional) and # Final-Recipient (which is mandatory, but may not exactly match # an address on our list). Some MTA's also use X-Actual-Recipient # as a synonym for Original-Recipient, but some apparently use # that for other purposes :( # # Also grok out Action so we can do something with that too. action = msgblock.get('action', '').lower() if action == 'delayed': return Stop if action not in ('failed', 'failure'): # Some non-permanent failure, so ignore this block continue params = [] foundp = False for header in ('original-recipient', 'final-recipient'): for k, v in msgblock.get_params([], header): if k.lower() == 'rfc822': foundp = True else: params.append(k) if foundp: # Note that params should already be unquoted. addrs.extend(params) break # Uniquify rtnaddrs = {} for a in addrs: if a is not None: realname, a = parseaddr(a) rtnaddrs[a] = True return rtnaddrs.keys()
def __get_body(self, msg, charset): body = [] patches = [] # Non multipart messages should be straightforward if not msg.is_multipart(): body.append(to_unicode(msg.get_payload(decode=True), charset)) return body, patches # Include all the attached texts if it is multipart parts = [part for part in typed_subpart_iterator(msg, 'text')] for part in parts: part_charset = part.get_content_charset() part_body = part.get_payload(decode=True) part_subtype = part.get_content_subtype() if part_subtype == 'plain': body.append(to_unicode(part_body, part_charset)) elif part_subtype in ('x-patch', 'x-diff'): patches.append(to_unicode(part_body, part_charset)) return body, patches
def unicode_email_body(email): body = "" if email.is_multipart(): for part in typed_subpart_iterator(email, "text", "plain"): charset = part.get_content_charset() # Si no se especificó un encoding intentamos con iso-8859-1 if not charset: charset = "iso-8859-1" body += unicode(part.get_payload(decode=True), charset) else: charset = email.get_content_charset() if not charset: charset = "iso-8859-1" body = unicode(email.get_payload(decode=True), charset) return body
def to_plaintext(msg): changedp = 0 for subpart in typed_subpart_iterator(msg, 'text', 'html'): filename = tempfile.mktemp('.html') fp = open(filename, 'w') try: fp.write(subpart.get_payload(decode=1)) fp.close() cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND % {'filename': filename}) plaintext = cmd.read() rtn = cmd.close() if rtn: syslog('error', 'HTML->text/plain error: %s', rtn) finally: try: os.unlink(filename) except OSError, e: if e.errno <> errno.ENOENT: raise # Now replace the payload of the subpart and twiddle the Content-Type: del subpart['content-transfer-encoding'] subpart.set_payload(plaintext) subpart.set_type('text/plain') changedp = 1
def process(mlist, msg, msgdata): # Short circuits # Do not short circuit. The problem is SpamDetect comes before Approve. # Suppose a message with an Approved: header is held by SpamDetect (or # any other handler that might come before Approve) and then approved # by a moderator. When the approved message reaches Approve in the # pipeline, we still need to remove the Approved: (pseudo-)header, so # we can't short circuit. #if msgdata.get('approved'): # Digests, Usenet postings, and some other messages come pre-approved. # TBD: we may want to further filter Usenet messages, so the test # above may not be entirely correct. #return # See if the message has an Approved or Approve header with a valid # list-moderator, list-admin. Also look at the first non-whitespace line # in the file to see if it looks like an Approved header. We are # specifically /not/ allowing the site admins password to work here # because we want to discourage the practice of sending the site admin # password through email in the clear. missing = [] for hdr in ('approved', 'approve', 'x-approved', 'x-approve'): passwd = msg.get(hdr, missing) if passwd is not missing: break if passwd is missing: # Find the first text/plain part in the message part = None stripped = False for part in typed_subpart_iterator(msg, 'text', 'plain'): break # XXX I'm not entirely sure why, but it is possible for the payload of # the part to be None, and you can't splitlines() on None. if part is not None and part.get_payload() is not None: lines = part.get_payload(decode=True).splitlines() line = '' for lineno, line in zip(range(len(lines)), lines): if line.strip(): break i = line.find(':') if i >= 0: name = line[:i] value = line[i+1:] if name.lower() in ('approve', 'approved', 'x-approve', 'x-approved', ): passwd = value.lstrip() # Now strip the first line from the payload so the # password doesn't leak. del lines[lineno] reset_payload(part, NL.join(lines)) stripped = True if stripped: # MAS: Bug 1181161 - Now try all the text parts in case it's # multipart/alternative with the approved line in HTML or other # text part. We make a pattern from the Approved line and delete # it from all text/* parts in which we find it. It would be # better to just iterate forward, but email compatability for pre # Python 2.2 returns a list, not a true iterator. Also, there # are pathological MUAs that put the HTML part first. # # This will process all the multipart/alternative parts in the # message as well as all other text parts. We shouldn't find the # pattern outside the mp/a parts, but if we do, it is probably # best to delete it anyway as it does contain the password. # # Make a pattern to delete. We can't just delete a line because # line of HTML or other fancy text may include additional message # text. This pattern works with HTML. It may not work with rtf # or whatever else is possible. # # If we don't find the pattern in the decoded part, but we do # find it after stripping HTML tags, we don't know how to remove # it, so we just reject the post. pattern = name + ':(\xA0|\s| )*' + re.escape(passwd) for part in typed_subpart_iterator(msg, 'text'): if part is not None and part.get_payload() is not None: lines = part.get_payload(decode=True) if re.search(pattern, lines): reset_payload(part, re.sub(pattern, '', lines)) elif re.search(pattern, re.sub('(?s)<.*?>', '', lines)): raise Errors.RejectMessage, REJECT if passwd is not missing and mlist.Authenticate((mm_cfg.AuthListPoster, mm_cfg.AuthListModerator, mm_cfg.AuthListAdmin), passwd): # BAW: should we definitely deny if the password exists but does not # match? For now we'll let it percolate up for further determination. msgdata['approved'] = 1 # Used by the Emergency module msgdata['adminapproved'] = 1 # has this message already been posted to this list? beentheres = [s.strip().lower() for s in msg.get_all('x-beenthere', [])] if mlist.GetListEmail().lower() in beentheres: raise Errors.LoopError
def replace(self, find, replace, trash_folder, callback=None): """Performs a body-wide string search and replace Note that this search-and-replace is pretty dumb, and will fail in, for example, HTML messages where HTML tags would alter the search string. Args: find -- the search term to look for as a string, or a tuple of items to replace with corresponding items in the replace tuple replace -- the string to replace instances of the "find" term with, or a tuple of terms to replace the corresponding strings in the find tuple trash_folder -- the name of the folder / label that is, in the current account, the trash container Returns: True on success, and in all other instances an error object """ def _set_content_transfer_encoding(part, encoding): try: del part['Content-Transfer-Encoding'] except: "" part.add_header('Content-Transfer-Encoding', encoding) valid_content_types = ('plain', 'html') for valid_type in valid_content_types: for part in typed_subpart_iterator(self.raw, 'text', valid_type): section_encoding = part['Content-Transfer-Encoding'] # If the message section doesn't advertise an encoding, # then default to quoted printable. Otherwise the module # will default to base64, which can cause problems if not section_encoding: section_encoding = "quoted-printable" else: section_encoding = section_encoding.lower() section_charset = message_part_charset(part, self.raw) new_payload_section = utf8_encode_message_part( part, self.raw, section_charset) if is_encoding_error(new_payload_section): self.encoding_error = new_payload_section return _cmd(callback, self.encoding_error) if isinstance(find, tuple) or isinstance(find, list): for i in range(0, len(find)): new_payload_section = new_payload_section.replace( find[i], replace[i]) else: new_payload_section = new_payload_section.replace( find, replace) new_payload_section = new_payload_section.encode( part._orig_charset, errors="replace") if section_encoding == "quoted-printable": new_payload_section = encodestring(new_payload_section, quotetabs=0) part.set_payload(new_payload_section, part._orig_charset) _set_content_transfer_encoding(part, "quoted-printable") elif section_encoding == "base64": part.set_payload(new_payload_section, part._orig_charset) ENC.encode_base64(part) _set_content_transfer_encoding(part, "base64") elif section_encoding in ('7bit', '8bit'): part.set_payload(new_payload_section, part._orig_charset) ENC.encode_7or8bit(part) _set_content_transfer_encoding(part, section_encoding) elif section_encoding == "binary": part.set_payload(new_payload_section, part._orig_charset) part['Content-Transfer-Encoding'] = 'binary' _set_content_transfer_encoding(part, 'binary') del part._normalized del part._orig_charset def _on_save(was_success): return _cmd(callback, was_success) return _cmd_cb(self.save, _on_save, bool(callback), trash_folder)
def process(mlist, msg, msgdata): # Short circuits # Do not short circuit. The problem is SpamDetect comes before Approve. # Suppose a message with an Approved: header is held by SpamDetect (or # any other handler that might come before Approve) and then approved # by a moderator. When the approved message reaches Approve in the # pipeline, we still need to remove the Approved: (pseudo-)header, so # we can't short circuit. #if msgdata.get('approved'): # Digests, Usenet postings, and some other messages come pre-approved. # TBD: we may want to further filter Usenet messages, so the test # above may not be entirely correct. #return # See if the message has an Approved or Approve header with a valid # list-moderator, list-admin. Also look at the first non-whitespace line # in the file to see if it looks like an Approved header. We are # specifically /not/ allowing the site admins password to work here # because we want to discourage the practice of sending the site admin # password through email in the clear. missing = [] for hdr in ('approved', 'approve', 'x-approved', 'x-approve'): passwd = msg.get(hdr, missing) if passwd is not missing: break if passwd is missing: # Find the first text/plain part in the message part = None stripped = False for part in typed_subpart_iterator(msg, 'text', 'plain'): break # XXX I'm not entirely sure why, but it is possible for the payload of # the part to be None, and you can't splitlines() on None. if part is not None and part.get_payload() is not None: lines = part.get_payload(decode=True).splitlines() line = '' for lineno, line in zip(range(len(lines)), lines): if line.strip(): break i = line.find(':') if i >= 0: name = line[:i] value = line[i + 1:] if name.lower() in ( 'approve', 'approved', 'x-approve', 'x-approved', ): passwd = value.lstrip() # Now strip the first line from the payload so the # password doesn't leak. del lines[lineno] reset_payload(part, NL.join(lines)) stripped = True if stripped: # MAS: Bug 1181161 - Now try all the text parts in case it's # multipart/alternative with the approved line in HTML or other # text part. We make a pattern from the Approved line and delete # it from all text/* parts in which we find it. It would be # better to just iterate forward, but email compatability for pre # Python 2.2 returns a list, not a true iterator. Also, there # are pathological MUAs that put the HTML part first. # # This will process all the multipart/alternative parts in the # message as well as all other text parts. We shouldn't find the # pattern outside the mp/a parts, but if we do, it is probably # best to delete it anyway as it does contain the password. # # Make a pattern to delete. We can't just delete a line because # line of HTML or other fancy text may include additional message # text. This pattern works with HTML. It may not work with rtf # or whatever else is possible. # # If we don't find the pattern in the decoded part, but we do # find it after stripping HTML tags, we don't know how to remove # it, so we just reject the post. pattern = name + ':(\xA0|\s| )*' + re.escape(passwd) for part in typed_subpart_iterator(msg, 'text'): if part is not None and part.get_payload() is not None: lines = part.get_payload(decode=True) if re.search(pattern, lines): reset_payload(part, re.sub(pattern, '', lines)) elif re.search(pattern, re.sub('(?s)<.*?>', '', lines)): raise Errors.RejectMessage, REJECT if passwd is not missing and mlist.Authenticate( (mm_cfg.AuthListPoster, mm_cfg.AuthListModerator, mm_cfg.AuthListAdmin), passwd): # BAW: should we definitely deny if the password exists but does not # match? For now we'll let it percolate up for further determination. msgdata['approved'] = 1 # Used by the Emergency module msgdata['adminapproved'] = 1 # has this message already been posted to this list? beentheres = [s.strip().lower() for s in msg.get_all('x-beenthere', [])] if mlist.GetListEmail().lower() in beentheres: raise Errors.LoopError