Exemple #1
0
    def _build_body_strings(self):
        if not self.has_built_body_strings:

            self.body_plain = u''
            self.body_html = u''

            for part in typed_subpart_iterator(self.raw, 'text', 'plain'):
                section_encoding = message_part_charset(part, self.raw) or self.charset
                section_text = utf8_encode_message_part(part, self.raw,
                                                        section_encoding)
                if is_encoding_error(section_text):
                    self.encoding_error = section_text
                else:
                    self.body_plain += section_text

            for part in typed_subpart_iterator(self.raw, 'text', 'html'):
                section_encoding = message_part_charset(part, self.raw) or self.charset
                section_text = utf8_encode_message_part(part, self.raw,
                                                        section_encoding)
                if is_encoding_error(section_text):
                    self.encoding_error = section_text
                else:
                    self.body_html += section_text

            self.has_built_body_strings = True
Exemple #2
0
def get_pending_mail(userdesc, perms, mlist, id, raw=0):
    """ Get informations about a given mail moderation.
            @mlist
            @lock
            @admin
    """
    ptime, sender, subject, reason, filename, msgdata = mlist.GetRecord(int(id))
    fpath = os.path.join(mm_cfg.DATA_DIR, filename)
    size = os.path.getsize(fpath)
    msg = readMessage(fpath)

    if raw:
        return quote(str(msg))
    results_plain = []
    results_html  = []
    for part in typed_subpart_iterator(msg, 'text', 'plain'):
        c = part.get_payload()
        if c is not None: results_plain.append (c)
    results_plain = map(lambda x: quote(x), results_plain)
    for part in typed_subpart_iterator(msg, 'text', 'html'):
        c = part.get_payload()
        if c is not None: results_html.append (c)
    results_html = map(lambda x: quote(x), results_html)
    return {'id'    : id,
            'sender': quote(sender, True),
            'size'  : size,
            'subj'  : quote(subject, True),
            'stamp' : ptime,
            'parts_plain' : results_plain,
            'parts_html': results_html }
Exemple #3
0
def get_pending_mail(userdesc, perms, mlist, id, raw=0):
    """ Get informations about a given mail moderation.
            @mlist
            @lock
            @admin
    """
    ptime, sender, subject, reason, filename, msgdata = mlist.GetRecord(int(id))
    fpath = os.path.join(mm_cfg.DATA_DIR, filename)
    size = os.path.getsize(fpath)
    msg = readMessage(fpath)

    if raw:
        return quote(str(msg))
    results_plain = []
    results_html  = []
    for part in typed_subpart_iterator(msg, 'text', 'plain'):
        c = part.get_payload()
        if c is not None: results_plain.append (c)
    results_plain = map(lambda x: quote(x), results_plain)
    for part in typed_subpart_iterator(msg, 'text', 'html'):
        c = part.get_payload()
        if c is not None: results_html.append (c)
    results_html = map(lambda x: quote(x), results_html)
    return {'id'    : id,
            'sender': quote(sender, True),
            'size'  : size,
            'subj'  : quote(subject, True),
            'stamp' : ptime,
            'parts_plain' : results_plain,
            'parts_html': results_html }
Exemple #4
0
    def _makeMessageInfo(self, message):
        """Given an email.Message, return an object with subjectHeader,
        bodySummary and other header (as needed) attributes.  These objects
        are passed into appendMessages by onReview - passing email.Message
        objects directly uses too much memory."""
        subjectHeader = message["Subject"] or "(none)"
        headers = {"subject" : subjectHeader}
        for header in options["html_ui", "display_headers"]:
            headers[header.lower()] = (message[header] or "(none)")
        score = message[options["Headers", "score_header_name"]]
        if score:
            # the score might have the log info at the end
            op = score.find('(')
            if op >= 0:
                score = score[:op]
            try:
                score = "%.2f%%" % (float(score)*100,)
            except ValueError:
                # Hmm.  The score header should only contain a floating
                # point number.  What's going on here, then?
                score = "Err"  # Let the user know something is wrong.
        else:
            # If the lookup fails, this means that the "include_score"
            # option isn't activated. We have the choice here to either
            # calculate it now, which is pretty inefficient, since we have
            # already done so, or to admit that we don't know what it is.
            # We'll go with the latter.
            score = "?"
        try:
            part = typed_subpart_iterator(message, 'text', 'plain').next()
            text = part.get_payload()
        except StopIteration:
            try:
                part = typed_subpart_iterator(message, 'text', 'html').next()
                text = part.get_payload()
                text, unused = tokenizer.crack_html_style(text)
                text, unused = tokenizer.crack_html_comment(text)
                text = tokenizer.html_re.sub(' ', text)
                text = '(this message only has an HTML body)\n' + text
            except StopIteration:
                text = '(this message has no text body)'
        if type(text) == type([]):  # gotta be a 'right' way to do this
            text = "(this message is a digest of %s messages)" % (len(text))
        elif text is None:
            text = "(this message has no body)"
        else:
            text = text.replace(' ', ' ')      # Else they'll be quoted
            text = re.sub(r'(\s)\s+', r'\1', text)  # Eg. multiple blank lines
            text = text.strip()

        class _MessageInfo:
            pass
        messageInfo = _MessageInfo()
        for headerName, headerValue in headers.items():
            headerValue = self._trimHeader(headerValue, 45, True)
            setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
        messageInfo.score = score
        messageInfo.bodySummary = self._trimHeader(text, 200)
        return messageInfo
Exemple #5
0
    def _makeMessageInfo(self, message):
        """Given an email.Message, return an object with subjectHeader,
        bodySummary and other header (as needed) attributes.  These objects
        are passed into appendMessages by onReview - passing email.Message
        objects directly uses too much memory.
        """
        message.delNotations()
        subjectHeader = message["Subject"] or "(none)"
        headers = {"subject": subjectHeader}
        for header in options["html_ui", "display_headers"]:
            headers[header.lower()] = message[header] or "(none)"
        score = message[options["Headers", "score_header_name"]]
        if score:
            op = score.find("(")
            if op >= 0:
                score = score[:op]
            try:
                score = float(score) * 100
            except ValueError:
                score = "Err"  # Let the user know something is wrong.
        else:
            score = "?"
        try:
            part = typed_subpart_iterator(message, "text", "plain").next()
            text = part.get_payload()
        except StopIteration:
            try:
                part = typed_subpart_iterator(message, "text", "html").next()
                text = part.get_payload()
                text, unused = tokenizer.crack_html_style(text)
                text, unused = tokenizer.crack_html_comment(text)
                text = tokenizer.html_re.sub(" ", text)
                text = _("(this message only has an HTML body)\n") + text
            except StopIteration:
                text = _("(this message has no text body)")
        if type(text) == type([]):  # gotta be a 'right' way to do this
            text = _("(this message is a digest of %s messages)") % (len(text))
        elif text is None:
            text = _("(this message has no body)")
        else:
            text = text.replace(" ", " ")  # Else they'll be quoted
            text = re.sub(r"(\s)\s+", r"\1", text)  # Eg. multiple blank lines
            text = text.strip()

        class _MessageInfo:
            pass

        messageInfo = _MessageInfo()
        for headerName, headerValue in headers.items():
            headerValue = self._trimHeader(headerValue, 45, True)
            setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
        messageInfo.score = score
        messageInfo.bodySummary = self._trimHeader(text, 200)
        return messageInfo
Exemple #6
0
def get_body(message):
    if message.is_multipart():
        text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')]
        text_parts += [part for part in typed_subpart_iterator(message, 'application', 'pgp-encrypted')]
        text_parts += [part for part in typed_subpart_iterator(message, 'application', 'octet-stream')]
        body = []
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            payload = part.get_payload(decode=True)
            body.append(unicode(payload, charset, "replace"))
        return u"\n".join(body).strip()
    else:
        body = unicode(message.get_payload(decode=True), get_charset(message), "replace")
        return body.strip()
  def process_body( self, msg ):
    email_body = ""
    def get_charset( msg, default="ascii" ):
      """Get the message charset"""
      if msg.get_content_charset(): return msg.get_content_charset();
      if msg.get_charset(): return msg.get_charset();
      return default

    if msg.is_multipart():
      parts = [part for part in typed_subpart_iterator(msg,'text','plain')]
      body = []
      for part in parts:
        charset = get_charset(part, get_charset(msg))
        body.append(unicode(part.get_payload(decode=True), charset, "replace"))

      email_body = u"\n".join(body).strip()

    else: # if it is not multipart, the payload will be a string
        # representing the message body
      body = unicode(msg.get_payload(decode=True),
                     get_charset(msg),
                     "replace")
      email_body = body.strip()

    return { "body" : { "text" : email_body } }
Exemple #8
0
    def get_body(self, format=""):
        'when the body is of multipart, the `format` is applicable.'
        'format = plain, text only'
        'format = html, html only'
        'if no format specified, return both.'

        def _decode_msg(msg):
            'decode one part of a msg'
            if msg:
                return unicode(msg.get_payload(decode=True),
                               msg.get_content_charset() or 'utf-8',
                               'replace').strip().encode('utf-8')
            else:
                return ""

        if self.msg.is_multipart():
            parts = typed_subpart_iterator(self.msg, 'text')
            if parts:
                return '\n'.join([_decode_msg(p) for p in parts])
            return ""

        else:
            body = unicode(self.msg.get_payload(decode=True),
                           self.msg.get_content_charset() or 'utf-8',
                           "replace")
            return body.strip().encode('utf-8')
Exemple #9
0
    def get_body(self, format=""):
        'when the body is of multipart, the `format` is applicable.'
        'format = plain, text only'
        'format = html, html only'
        'if no format specified, return both.'

        def _decode_msg(msg):
            'decode one part of a msg'
            if msg:
                return unicode(
                    msg.get_payload(decode=True),
                    msg.get_content_charset() or 'utf-8',
                    'replace'
                ).strip().encode('utf-8')
            else:
                return ""

        if self.msg.is_multipart():
            parts = typed_subpart_iterator(self.msg, 'text')
            if parts:
                return '\n'.join([_decode_msg(p) for p in parts])
            return ""

        else:
            body = unicode(self.msg.get_payload(decode=True),
                           self.msg.get_content_charset() or 'utf-8', "replace")
            return body.strip().encode('utf-8')
Exemple #10
0
def get_body(message):
    """Get the body of the email message"""

    if message.is_multipart():
        # get the plain text version only
        text_parts = [part
                      for part in typed_subpart_iterator(message,
                                                         'text',
                                                         'plain')]
        body = []
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            body.append(unicode(part.get_payload(decode=True),
                                charset,
                                "replace"))

        return u"\n".join(body).strip()

    else:
        # if it is not multipart, the payload will be a string
        # representing the message body
        body = unicode(message.get_payload(decode=True),
                       get_charset(message),
                       "replace")
        return body.strip()
Exemple #11
0
    def extract_multipart(self, message):
        text_parts = [part for part in typed_subpart_iterator(message)]
        body = []
        html = 0
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            charset = fixaliases_charset(charset)
            if part.get_content_subtype() != "plain":
                html = html + 1
            try:
                body.append(unicode(part.get_payload(decode=True), charset, "replace"))
            except LookupError:
                body.append(unicode(part.get_payload(decode=True), "utf8", "replace"))

        texts = "\n".join(body)
        images = len([part for part in typed_subpart_iterator(message, "image")])
        videos = len([part for part in typed_subpart_iterator(message, "video")])
        applications = len([part for part in typed_subpart_iterator(message, "application")])
        return (texts, html, images, videos, applications)
Exemple #12
0
 def __init__(self, context, message):  # -> none
     """Extract the bits of interest from an RFC2822 message string.
     context should be a wiki page. This perhaps should do the isJunk
     test up front to avoid unnecessary resource usage.
     """
     DEBUG('mailin.py processing incoming message:\n%s' % message)
     self.context = context
     self.original = message
     self.msg = email.message_from_string(self.original)
     self.date = self.msg['Date']
     # flatten a multi-line subject into one line
     s = re.sub('\n', '', self.msg.get('Subject', ''))
     # convert the possibly RFC2047-encoded subject to unicode.
     # Only the first encoded part is used if there is more than one.
     # misencoded subjects are ignored.
     (s, enc) = decode_header(s)[0]
     try:
         self.subject = tounicode(s, enc or 'ascii')
     except UnicodeDecodeError:
         self.subject = ''
     self.realSubject = re.sub(r'.*?\[.*?\] ?(.*)', r'\1', self.subject)
     self.messageid = self.msg.get('Message-id', '')
     self.inreplyto = self.msg.get('In-reply-to', '')
     self.From = self.msg.get('From')
     self.FromRealName = parseaddr(self.From)[0]
     self.FromEmail = parseaddr(self.From)[1]
     self.FromUserName = (self.FromRealName
                          or re.sub(r'@.*$', r'', self.FromEmail))
     self.sender = self.msg.get('Sender')
     self.senderEmail = (self.sender and parseaddr(self.sender)[1]) or None
     tos = self.msg.get_all('to', [])
     ccs = self.msg.get_all('cc', [])
     resent_tos = self.msg.get_all('resent-to', [])
     resent_ccs = self.msg.get_all('resent-cc', [])
     self.recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
     # mailing list support
     # XXX x-beenthere is mailman-specific - need to support ezmlm & others here
     #self.xbeenthere = (self.msg.get('X-BeenThere') or
     #                   re.search(r'[^\s<]+@[^\s>]+',self.msg.get('Delivered-To')).group())
     # ..Type Error - configured ezmlm to provide beenthere instead (?)
     self.xbeenthere = self.msg.get('X-BeenThere')
     # the mailin body will be the message's first text/plain part
     # (or a null string if there is none or it's misencoded)
     try:
         firstplaintextpart = typed_subpart_iterator(
             self.msg, 'text', 'plain').next()
         # as I understand it:
         # first decoding, from the content-transfer-encoding, eg quoted-printabe
         payload = firstplaintextpart.get_payload(decode=1)
         # second decoding, from utf8 or whatever to unicode
         charset = self.msg.get_content_charset('ascii')
         payloadutf8 = payload.decode(charset).encode('utf-8')
     except (StopIteration, UnicodeDecodeError):
         payloadutf8 = ''
     self.body = cleanupBody(payloadutf8)
Exemple #13
0
def check(msg):
    # Iterate over each message/delivery-status subpart
    addrs = []
    for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
        if not part.is_multipart():
            # Huh?
            continue
        # Each message/delivery-status contains a list of Message objects
        # which are the header blocks.  Iterate over those too.
        for msgblock in part.get_payload():
            # We try to dig out the Original-Recipient (which is optional) and
            # Final-Recipient (which is mandatory, but may not exactly match
            # an address on our list).  Some MTA's also use X-Actual-Recipient
            # as a synonym for Original-Recipient, but some apparently use
            # that for other purposes :(
            #
            # Also grok out Action so we can do something with that too.
            action = msgblock.get('action', '').lower()
            # Some MTAs have been observed that put comments on the action.
            if action.startswith('delayed'):
                return Stop
            if not action.startswith('fail'):
                # Some non-permanent failure, so ignore this block
                continue
            params = []
            foundp = False
            for header in ('original-recipient', 'final-recipient'):
                for k, v in msgblock.get_params([], header):
                    if k.lower() == 'rfc822':
                        foundp = True
                    else:
                        params.append(k)
                if foundp:
                    # Note that params should already be unquoted.
                    addrs.extend(params)
                    break
                else:
                    # MAS: This is a kludge, but SMTP-GATEWAY01.intra.home.dk
                    # has a final-recipient with an angle-addr and no
                    # address-type parameter at all. Non-compliant, but ...
                    for param in params:
                        if param.startswith('<') and param.endswith('>'):
                            addrs.append(param[1:-1])
    # Uniquify
    rtnaddrs = {}
    for a in addrs:
        if a is not None:
            realname, a = parseaddr(a)
            rtnaddrs[a] = True
    return rtnaddrs.keys()
Exemple #14
0
def check(msg):
    # Iterate over each message/delivery-status subpart
    addrs = []
    for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
        if not part.is_multipart():
            # Huh?
            continue
        # Each message/delivery-status contains a list of Message objects
        # which are the header blocks.  Iterate over those too.
        for msgblock in part.get_payload():
            # We try to dig out the Original-Recipient (which is optional) and
            # Final-Recipient (which is mandatory, but may not exactly match
            # an address on our list).  Some MTA's also use X-Actual-Recipient
            # as a synonym for Original-Recipient, but some apparently use
            # that for other purposes :(
            #
            # Also grok out Action so we can do something with that too.
            action = msgblock.get('action', '').lower()
            # Some MTAs have been observed that put comments on the action.
            if action.startswith('delayed'):
                return Stop
            if not action.startswith('fail'):
                # Some non-permanent failure, so ignore this block
                continue
            params = []
            foundp = False
            for header in ('original-recipient', 'final-recipient'):
                for k, v in msgblock.get_params([], header):
                    if k.lower() == 'rfc822':
                        foundp = True
                    else:
                        params.append(k)
                if foundp:
                    # Note that params should already be unquoted.
                    addrs.extend(params)
                    break
                else:
                    # MAS: This is a kludge, but SMTP-GATEWAY01.intra.home.dk
                    # has a final-recipient with an angle-addr and no
                    # address-type parameter at all. Non-compliant, but ...
                    for param in params:
                        if param.startswith('<') and param.endswith('>'):
                            addrs.append(param[1:-1])
    # Uniquify
    rtnaddrs = {}
    for a in addrs:
        if a is not None:
            realname, a = parseaddr(a)
            rtnaddrs[a] = True
    return rtnaddrs.keys()
Exemple #15
0
    def __init__(self,
                 context,
                 message,
                 ):
        """
        Extract the bits of interest from an RFC2822 message string.

        This perhaps should do the isJunk test up front to avoid
        unnecessary resource usage.
        """
        BLATHER('mailin.py processing incoming message:\n%s' % message)
        #BLATHER('mailin.py processing incoming message')
        self.context = context
        self.original = message
        self.msg = email.message_from_string(self.original)
        self.date = self.msg['Date']
        self.subject = re.sub(r'\n',r'',self.msg.get('Subject',''))
        self.realSubject = re.sub(r'.*?\[.*?\] ?(.*)',r'\1',self.subject)
        self.messageid = self.msg.get('Message-id','')
        self.inreplyto = self.msg.get('In-reply-to','')
        self.From = self.msg.get('From')
        self.FromRealName = parseaddr(self.From)[0]
        self.FromEmail    = parseaddr(self.From)[1]
        self.FromUserName = (self.FromRealName or
                             re.sub(r'@.*$',r'',self.FromEmail))
        self.sender = self.msg.get('Sender')
        self.senderEmail = (self.sender and
                            parseaddr(self.sender)[1]) or None
        tos = self.msg.get_all('to', [])
        ccs = self.msg.get_all('cc', [])
        resent_tos = self.msg.get_all('resent-to', [])
        resent_ccs = self.msg.get_all('resent-cc', [])
        self.recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)

        # mailing list support
        # XXX x-beenthere is mailman-specific - need to support ezmlm & others here
        #self.xbeenthere = (self.msg.get('X-BeenThere') or
        #                   re.search(r'[^\s<]+@[^\s>]+',self.msg.get('Delivered-To')).group())
        # ..Type Error - configured ezmlm to provide beenthere instead (?)
        self.xbeenthere = self.msg.get('X-BeenThere')

        # raises an exception if there's no text part
        try:
            plaintextpart = typed_subpart_iterator(self.msg,
                                                   'text',
                                                   'plain').next().get_payload(decode=1)
        except StopIteration:
            plaintextpart = ''
        self.body = self.cleanupBody(plaintextpart)
Exemple #16
0
def get_body(message):
    if message.is_multipart():
        # payload is an iterator of sub-parts
        text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')]
        body = []
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            body.append(unicode(part.get_payload(decode=True), encoding=charset, errors="replace"))

        return u"\n".join(body).strip()

    else: 
        # payload is a string
        body = unicode(message.get_payload(decode=True), encoding=get_charset(message), errors="replace")
        return body.strip()
def get_body(message):
    """Get the body of the email message"""
    if message.is_multipart():
        #get the plain text version only
        text_parts = [part
                      for part in typed_subpart_iterator(message, 'text', 'plain')]
        body = []
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            body.append(part.get_payload(decode=True))

        return "\n".join(body).strip()
    else:
        body = message.get_payload(decode=True)
        return body.strip()
def get_image(message):
    """Check if message is multipart or not.
    If so, then parse message body and extract image (JPEG in this version,
    but it would be quite easy to loop over any kinds of image types"""

    if message.is_multipart():
        # get the FIRST image JPEG version only
        image_parts = [part for part in
                       typed_subpart_iterator(message, 'image', 'jpg')]
        image_part = "" if len(image_parts) is 0 else image_parts[0]
        return return_image(image_part)

    else:
        payload = message.get_payload()
        attachment = "" if len(payload) is 0 else payload[1]
        return return_image(attachment)
Exemple #19
0
def get_image(message):
    #Check if message is multipart or not.
    """If so, then parse message body and extract image (JPEG only in this version,
    but it would be quite easy to loop over any kinds of image types"""

    if message.is_multipart():
        #get the FIRST image JPEG version only
        image_parts = [
            part for part in typed_subpart_iterator(message, 'image', 'jpg')
        ]
        image_part = image_parts[0]
        return return_image(image_part)

    else:
        attachment = message.get_payload()[1]
        return return_image(attachment)
Exemple #20
0
  def process_message(self, email_text, decode=True):
    """ 
      Loop over each part of the message to get the content.
    """
    msg = email.message_from_string(email_text)
    message_content = list()

    msg_payload_type = None
    msg_payload_body = None
    parts = list(typed_subpart_iterator(msg, "text"))
    for part in parts:
        msg_payload_type = part.get_content_type()
        msg_payload_body = part.get_payload(None, True) #True decodes base64 if necessary
        message_content.append({msg_payload_type: msg_payload_body})
    #log(message_content)
    return message_content
Exemple #21
0
 def get_content_with_type(self, message, contenttype='text/plain'):
     """
     parse email subject, content from data.
     """
     maintype, subtype = contenttype.split('/')
     parts = [part
                   for part in typed_subpart_iterator(message,
                                                      maintype,
                                                      subtype)]
     body = []
     for part in parts:
         charset = self.get_charset(part, self.get_charset(message))
         body.append(unicode(part.get_payload(decode=True),
                             charset,
                             "replace"))
     return u"\n".join(body).strip()
 def __init__(self, mlist, msg, msgdata):
     self.mlist = mlist
     self.msg = msg
     self.msgdata = msgdata
     # Only set returnaddr if the response is to go to someone other than
     # the address specified in the From: header (e.g. for the password
     # command).
     self.returnaddr = None
     self.commands = []
     self.results = []
     self.ignored = []
     self.lineno = 0
     self.subjcmdretried = 0
     self.respond = True
     # Extract the subject header and do RFC 2047 decoding.  Note that
     # Python 2.1's unicode() builtin doesn't call obj.__unicode__().
     subj = msg.get('subject', '')
     try:
         subj = make_header(decode_header(subj)).__unicode__()
         # TK: Currently we don't allow 8bit or multibyte in mail command.
         # MAS: However, an l10n 'Re:' may contain non-ascii so ignore it.
         subj = subj.encode('us-ascii', 'ignore')
         # Always process the Subject: header first
         self.commands.append(subj)
     except (HeaderParseError, UnicodeError, LookupError):
         # We couldn't parse it so ignore the Subject header
         pass
     # Find the first text/plain part
     part = None
     for part in typed_subpart_iterator(msg, 'text', 'plain'):
         break
     if part is None or part is not msg:
         # Either there was no text/plain part or we ignored some
         # non-text/plain parts.
         self.results.append(_('Ignoring non-text/plain MIME parts'))
     if part is None:
         # E.g the outer Content-Type: was text/html
         return
     # convert message into unicode because 'utf-8' message increasing
     mcset = part.get_content_charset('us-ascii')
     body = unicode(part.get_payload(decode=True), mcset, 'replace')
     # text/plain parts better have string payloads
     assert isinstance(body, StringType) or isinstance(body, UnicodeType)
     lines = body.splitlines()
     # Use no more lines than specified
     self.commands.extend(lines[:mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES])
     self.ignored.extend(lines[mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES:])
Exemple #23
0
    def process_message(self, email_text, decode=True):
        """ 
      Loop over each part of the message to get the content.
    """
        msg = email.message_from_string(email_text)
        message_content = list()

        msg_payload_type = None
        msg_payload_body = None
        parts = list(typed_subpart_iterator(msg, "text"))
        for part in parts:
            msg_payload_type = part.get_content_type()
            msg_payload_body = part.get_payload(
                None, True)  #True decodes base64 if necessary
            message_content.append({msg_payload_type: msg_payload_body})
        #log(message_content)
        return message_content
 def __init__(self, mlist, msg, msgdata):
     self.mlist = mlist
     self.msg = msg
     self.msgdata = msgdata
     # Only set returnaddr if the response is to go to someone other than
     # the address specified in the From: header (e.g. for the password
     # command).
     self.returnaddr = None
     self.commands = []
     self.results = []
     self.ignored = []
     self.lineno = 0
     self.subjcmdretried = 0
     self.respond = True
     # Extract the subject header and do RFC 2047 decoding.  Note that
     # Python 2.1's unicode() builtin doesn't call obj.__unicode__().
     subj = msg.get('subject', '')
     try:
         subj = make_header(decode_header(subj)).__unicode__()
         # TK: Currently we don't allow 8bit or multibyte in mail command.
         # MAS: However, an l10n 'Re:' may contain non-ascii so ignore it.
         subj = subj.encode('us-ascii', 'ignore')
         # Always process the Subject: header first
         self.commands.append(subj)
     except (HeaderParseError, UnicodeError, LookupError):
         # We couldn't parse it so ignore the Subject header
         pass
     # Find the first text/plain part
     part = None
     for part in typed_subpart_iterator(msg, 'text', 'plain'):
         break
     if part is None or part is not msg:
         # Either there was no text/plain part or we ignored some
         # non-text/plain parts.
         self.results.append(_('Ignoring non-text/plain MIME parts'))
     if part is None:
         # E.g the outer Content-Type: was text/html
         return
     body = part.get_payload(decode=True)
     # text/plain parts better have string payloads
     assert isinstance(body, StringType) or isinstance(body, UnicodeType)
     lines = body.splitlines()
     # Use no more lines than specified
     self.commands.extend(lines[:mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES])
     self.ignored.extend(lines[mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES:])
Exemple #25
0
def get_message_body(message):
    """Get the body of the email message"""
    # from http://ginstrom.com/scribbles/2007/11/19/parsing-multilingual-email-with-python/

    if message.is_multipart():
        text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')]
        body = []
        for part in text_parts:
            charset = get_message_charset(part, get_message_charset(message))
            body.append(unicode(part.get_payload(decode=True), charset, "replace"))
        return u"\n".join(body).strip()

    else:
        if message.get_content_type() == "text/plain":
            body = unicode(message.get_payload(decode=True), get_message_charset(message), "replace")
            return body.strip()
        else:
            return ""
Exemple #26
0
def check(msg):
    # Iterate over each message/delivery-status subpart
    addrs = []
    for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
        if not part.is_multipart():
            # Huh?
            continue
        # Each message/delivery-status contains a list of Message objects
        # which are the header blocks.  Iterate over those too.
        for msgblock in part.get_payload():
            # We try to dig out the Original-Recipient (which is optional) and
            # Final-Recipient (which is mandatory, but may not exactly match
            # an address on our list).  Some MTA's also use X-Actual-Recipient
            # as a synonym for Original-Recipient, but some apparently use
            # that for other purposes :(
            #
            # Also grok out Action so we can do something with that too.
            action = msgblock.get('action', '').lower()
            if action == 'delayed':
                return Stop
            if action not in ('failed', 'failure'):
                # Some non-permanent failure, so ignore this block
                continue
            params = []
            foundp = False
            for header in ('original-recipient', 'final-recipient'):
                for k, v in msgblock.get_params([], header):
                    if k.lower() == 'rfc822':
                        foundp = True
                    else:
                        params.append(k)
                if foundp:
                    # Note that params should already be unquoted.
                    addrs.extend(params)
                    break
    # Uniquify
    rtnaddrs = {}
    for a in addrs:
        if a is not None:
            realname, a = parseaddr(a)
            rtnaddrs[a] = True
    return rtnaddrs.keys()
def check(msg):
    # Iterate over each message/delivery-status subpart
    addrs = []
    for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
        if not part.is_multipart():
            # Huh?
            continue
        # Each message/delivery-status contains a list of Message objects
        # which are the header blocks.  Iterate over those too.
        for msgblock in part.get_payload():
            # We try to dig out the Original-Recipient (which is optional) and
            # Final-Recipient (which is mandatory, but may not exactly match
            # an address on our list).  Some MTA's also use X-Actual-Recipient
            # as a synonym for Original-Recipient, but some apparently use
            # that for other purposes :(
            #
            # Also grok out Action so we can do something with that too.
            action = msgblock.get('action', '').lower()
            if action == 'delayed':
                return Stop
            if action not in ('failed', 'failure'):
                # Some non-permanent failure, so ignore this block
                continue
            params = []
            foundp = False
            for header in ('original-recipient', 'final-recipient'):
                for k, v in msgblock.get_params([], header):
                    if k.lower() == 'rfc822':
                        foundp = True
                    else:
                        params.append(k)
                if foundp:
                    # Note that params should already be unquoted.
                    addrs.extend(params)
                    break
    # Uniquify
    rtnaddrs = {}
    for a in addrs:
        if a is not None:
            realname, a = parseaddr(a)
            rtnaddrs[a] = True
    return rtnaddrs.keys()
    def __get_body(self, msg, charset):
        body = []
        patches = []

        # Non multipart messages should be straightforward
        if not msg.is_multipart():
            body.append(to_unicode(msg.get_payload(decode=True), charset))
            return body, patches

        # Include all the attached texts if it is multipart
        parts = [part for part in typed_subpart_iterator(msg, 'text')]
        for part in parts:
            part_charset = part.get_content_charset()
            part_body = part.get_payload(decode=True)
            part_subtype = part.get_content_subtype()
            if part_subtype == 'plain':
                body.append(to_unicode(part_body, part_charset))
            elif part_subtype in ('x-patch', 'x-diff'):
                patches.append(to_unicode(part_body, part_charset))

        return body, patches
    def __get_body(self, msg, charset):
        body = []
        patches = []

        # Non multipart messages should be straightforward
        if not msg.is_multipart():
            body.append(to_unicode(msg.get_payload(decode=True), charset))
            return body, patches

        # Include all the attached texts if it is multipart
        parts = [part for part in typed_subpart_iterator(msg, 'text')]
        for part in parts:
            part_charset = part.get_content_charset()
            part_body = part.get_payload(decode=True)
            part_subtype = part.get_content_subtype()
            if part_subtype == 'plain':
                body.append(to_unicode(part_body, part_charset))
            elif part_subtype in ('x-patch', 'x-diff'):
                patches.append(to_unicode(part_body, part_charset))

        return body, patches
def unicode_email_body(email):
    body = ""

    if email.is_multipart():
        for part in typed_subpart_iterator(email, "text", "plain"):
            charset = part.get_content_charset()

            # Si no se especificó un encoding intentamos con iso-8859-1
            if not charset:
                charset = "iso-8859-1"

            body += unicode(part.get_payload(decode=True), charset)
    else:
        charset = email.get_content_charset()

        if not charset:
            charset = "iso-8859-1"

        body = unicode(email.get_payload(decode=True), charset)

    return body
def unicode_email_body(email):
    body = ""

    if email.is_multipart():
        for part in typed_subpart_iterator(email, "text", "plain"):
            charset = part.get_content_charset()

            # Si no se especificó un encoding intentamos con iso-8859-1
            if not charset:
                charset = "iso-8859-1"

            body += unicode(part.get_payload(decode=True), charset)
    else:
        charset = email.get_content_charset()

        if not charset:
            charset = "iso-8859-1"

        body = unicode(email.get_payload(decode=True), charset)

    return body
Exemple #32
0
def to_plaintext(msg):
    changedp = 0
    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
        filename = tempfile.mktemp('.html')
        fp = open(filename, 'w')
        try:
            fp.write(subpart.get_payload(decode=1))
            fp.close()
            cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
                           {'filename': filename})
            plaintext = cmd.read()
            rtn = cmd.close()
            if rtn:
                syslog('error', 'HTML->text/plain error: %s', rtn)
        finally:
            try:
                os.unlink(filename)
            except OSError, e:
                if e.errno <> errno.ENOENT: raise
        # Now replace the payload of the subpart and twiddle the Content-Type:
        del subpart['content-transfer-encoding']
        subpart.set_payload(plaintext)
        subpart.set_type('text/plain')
        changedp = 1
Exemple #33
0
def to_plaintext(msg):
    changedp = 0
    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
        filename = tempfile.mktemp('.html')
        fp = open(filename, 'w')
        try:
            fp.write(subpart.get_payload(decode=1))
            fp.close()
            cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
                           {'filename': filename})
            plaintext = cmd.read()
            rtn = cmd.close()
            if rtn:
                syslog('error', 'HTML->text/plain error: %s', rtn)
        finally:
            try:
                os.unlink(filename)
            except OSError, e:
                if e.errno <> errno.ENOENT: raise
        # Now replace the payload of the subpart and twiddle the Content-Type:
        del subpart['content-transfer-encoding']
        subpart.set_payload(plaintext)
        subpart.set_type('text/plain')
        changedp = 1
Exemple #34
0
def process(mlist, msg, msgdata):
    # Short circuits
    # Do not short circuit. The problem is SpamDetect comes before Approve.
    # Suppose a message with an Approved: header is held by SpamDetect (or
    # any other handler that might come before Approve) and then approved
    # by a moderator. When the approved message reaches Approve in the
    # pipeline, we still need to remove the Approved: (pseudo-)header, so
    # we can't short circuit.
    #if msgdata.get('approved'):
        # Digests, Usenet postings, and some other messages come pre-approved.
        # TBD: we may want to further filter Usenet messages, so the test
        # above may not be entirely correct.
        #return
    # See if the message has an Approved or Approve header with a valid
    # list-moderator, list-admin.  Also look at the first non-whitespace line
    # in the file to see if it looks like an Approved header.  We are
    # specifically /not/ allowing the site admins password to work here
    # because we want to discourage the practice of sending the site admin
    # password through email in the clear.
    missing = []
    for hdr in ('approved', 'approve', 'x-approved', 'x-approve'):
        passwd = msg.get(hdr, missing)
        if passwd is not missing:
            break
    if passwd is missing:
        # Find the first text/plain part in the message
        part = None
        stripped = False
        for part in typed_subpart_iterator(msg, 'text', 'plain'):
            break
        # XXX I'm not entirely sure why, but it is possible for the payload of
        # the part to be None, and you can't splitlines() on None.
        if part is not None and part.get_payload() is not None:
            lines = part.get_payload(decode=True).splitlines()
            line = ''
            for lineno, line in zip(range(len(lines)), lines):
                if line.strip():
                    break
            i = line.find(':')
            if i >= 0:
                name = line[:i]
                value = line[i+1:]
                if name.lower() in ('approve',
                                    'approved',
                                    'x-approve',
                                    'x-approved',
                                    ):
                    passwd = value.lstrip()
                    # Now strip the first line from the payload so the
                    # password doesn't leak.
                    del lines[lineno]
                    reset_payload(part, NL.join(lines))
                    stripped = True
        if stripped:
            # MAS: Bug 1181161 - Now try all the text parts in case it's
            # multipart/alternative with the approved line in HTML or other
            # text part.  We make a pattern from the Approved line and delete
            # it from all text/* parts in which we find it.  It would be
            # better to just iterate forward, but email compatability for pre
            # Python 2.2 returns a list, not a true iterator.  Also, there
            # are pathological MUAs that put the HTML part first.
            #
            # This will process all the multipart/alternative parts in the
            # message as well as all other text parts.  We shouldn't find the
            # pattern outside the mp/a parts, but if we do, it is probably
            # best to delete it anyway as it does contain the password.
            #
            # Make a pattern to delete.  We can't just delete a line because
            # line of HTML or other fancy text may include additional message
            # text.  This pattern works with HTML.  It may not work with rtf
            # or whatever else is possible.
            #
            # If we don't find the pattern in the decoded part, but we do
            # find it after stripping HTML tags, we don't know how to remove
            # it, so we just reject the post.
            pattern = name + ':(\xA0|\s|&nbsp;)*' + re.escape(passwd)
            for part in typed_subpart_iterator(msg, 'text'):
                if part is not None and part.get_payload() is not None:
                    lines = part.get_payload(decode=True)
                    if re.search(pattern, lines):
                        reset_payload(part, re.sub(pattern, '', lines))
                    elif re.search(pattern, re.sub('(?s)<.*?>', '', lines)):
                        raise Errors.RejectMessage, REJECT
    if passwd is not missing and mlist.Authenticate((mm_cfg.AuthListPoster,
                                                     mm_cfg.AuthListModerator,
                                                     mm_cfg.AuthListAdmin),
                                                    passwd):
        # BAW: should we definitely deny if the password exists but does not
        # match?  For now we'll let it percolate up for further determination.
        msgdata['approved'] = 1
        # Used by the Emergency module
        msgdata['adminapproved'] = 1
    # has this message already been posted to this list?
    beentheres = [s.strip().lower() for s in msg.get_all('x-beenthere', [])]
    if mlist.GetListEmail().lower() in beentheres:
        raise Errors.LoopError
Exemple #35
0
    def replace(self, find, replace, trash_folder, callback=None):
        """Performs a body-wide string search and replace

        Note that this search-and-replace is pretty dumb, and will fail
        in, for example, HTML messages where HTML tags would alter the search
        string.

        Args:
            find         -- the search term to look for as a string, or a tuple
                            of items to replace with corresponding items in the
                            replace tuple
            replace      -- the string to replace instances of the "find" term
                            with, or a tuple of terms to replace the
                            corresponding strings in the find tuple
            trash_folder -- the name of the folder / label that is, in the
                            current account, the trash container

        Returns:
            True on success, and in all other instances an error object
        """
        def _set_content_transfer_encoding(part, encoding):
            try:
                del part['Content-Transfer-Encoding']
            except:
                ""
            part.add_header('Content-Transfer-Encoding', encoding)

        valid_content_types = ('plain', 'html')

        for valid_type in valid_content_types:

            for part in typed_subpart_iterator(self.raw, 'text', valid_type):

                section_encoding = part['Content-Transfer-Encoding']

                # If the message section doesn't advertise an encoding,
                # then default to quoted printable. Otherwise the module
                # will default to base64, which can cause problems
                if not section_encoding:
                    section_encoding = "quoted-printable"
                else:
                    section_encoding = section_encoding.lower()

                section_charset = message_part_charset(part, self.raw)
                new_payload_section = utf8_encode_message_part(
                    part, self.raw, section_charset)

                if is_encoding_error(new_payload_section):
                    self.encoding_error = new_payload_section
                    return _cmd(callback, self.encoding_error)

                if isinstance(find, tuple) or isinstance(find, list):
                    for i in range(0, len(find)):
                        new_payload_section = new_payload_section.replace(
                            find[i], replace[i])
                else:
                    new_payload_section = new_payload_section.replace(
                        find, replace)

                new_payload_section = new_payload_section.encode(
                    part._orig_charset, errors="replace")

                if section_encoding == "quoted-printable":
                    new_payload_section = encodestring(new_payload_section,
                                                       quotetabs=0)
                    part.set_payload(new_payload_section, part._orig_charset)
                    _set_content_transfer_encoding(part, "quoted-printable")
                elif section_encoding == "base64":
                    part.set_payload(new_payload_section, part._orig_charset)
                    ENC.encode_base64(part)
                    _set_content_transfer_encoding(part, "base64")
                elif section_encoding in ('7bit', '8bit'):
                    part.set_payload(new_payload_section, part._orig_charset)
                    ENC.encode_7or8bit(part)
                    _set_content_transfer_encoding(part, section_encoding)
                elif section_encoding == "binary":
                    part.set_payload(new_payload_section, part._orig_charset)
                    part['Content-Transfer-Encoding'] = 'binary'
                    _set_content_transfer_encoding(part, 'binary')

                del part._normalized
                del part._orig_charset

        def _on_save(was_success):
            return _cmd(callback, was_success)

        return _cmd_cb(self.save, _on_save, bool(callback), trash_folder)
Exemple #36
0
def process(mlist, msg, msgdata):
    # Short circuits
    # Do not short circuit. The problem is SpamDetect comes before Approve.
    # Suppose a message with an Approved: header is held by SpamDetect (or
    # any other handler that might come before Approve) and then approved
    # by a moderator. When the approved message reaches Approve in the
    # pipeline, we still need to remove the Approved: (pseudo-)header, so
    # we can't short circuit.
    #if msgdata.get('approved'):
    # Digests, Usenet postings, and some other messages come pre-approved.
    # TBD: we may want to further filter Usenet messages, so the test
    # above may not be entirely correct.
    #return
    # See if the message has an Approved or Approve header with a valid
    # list-moderator, list-admin.  Also look at the first non-whitespace line
    # in the file to see if it looks like an Approved header.  We are
    # specifically /not/ allowing the site admins password to work here
    # because we want to discourage the practice of sending the site admin
    # password through email in the clear.
    missing = []
    for hdr in ('approved', 'approve', 'x-approved', 'x-approve'):
        passwd = msg.get(hdr, missing)
        if passwd is not missing:
            break
    if passwd is missing:
        # Find the first text/plain part in the message
        part = None
        stripped = False
        for part in typed_subpart_iterator(msg, 'text', 'plain'):
            break
        # XXX I'm not entirely sure why, but it is possible for the payload of
        # the part to be None, and you can't splitlines() on None.
        if part is not None and part.get_payload() is not None:
            lines = part.get_payload(decode=True).splitlines()
            line = ''
            for lineno, line in zip(range(len(lines)), lines):
                if line.strip():
                    break
            i = line.find(':')
            if i >= 0:
                name = line[:i]
                value = line[i + 1:]
                if name.lower() in (
                        'approve',
                        'approved',
                        'x-approve',
                        'x-approved',
                ):
                    passwd = value.lstrip()
                    # Now strip the first line from the payload so the
                    # password doesn't leak.
                    del lines[lineno]
                    reset_payload(part, NL.join(lines))
                    stripped = True
        if stripped:
            # MAS: Bug 1181161 - Now try all the text parts in case it's
            # multipart/alternative with the approved line in HTML or other
            # text part.  We make a pattern from the Approved line and delete
            # it from all text/* parts in which we find it.  It would be
            # better to just iterate forward, but email compatability for pre
            # Python 2.2 returns a list, not a true iterator.  Also, there
            # are pathological MUAs that put the HTML part first.
            #
            # This will process all the multipart/alternative parts in the
            # message as well as all other text parts.  We shouldn't find the
            # pattern outside the mp/a parts, but if we do, it is probably
            # best to delete it anyway as it does contain the password.
            #
            # Make a pattern to delete.  We can't just delete a line because
            # line of HTML or other fancy text may include additional message
            # text.  This pattern works with HTML.  It may not work with rtf
            # or whatever else is possible.
            #
            # If we don't find the pattern in the decoded part, but we do
            # find it after stripping HTML tags, we don't know how to remove
            # it, so we just reject the post.
            pattern = name + ':(\xA0|\s|&nbsp;)*' + re.escape(passwd)
            for part in typed_subpart_iterator(msg, 'text'):
                if part is not None and part.get_payload() is not None:
                    lines = part.get_payload(decode=True)
                    if re.search(pattern, lines):
                        reset_payload(part, re.sub(pattern, '', lines))
                    elif re.search(pattern, re.sub('(?s)<.*?>', '', lines)):
                        raise Errors.RejectMessage, REJECT
    if passwd is not missing and mlist.Authenticate(
        (mm_cfg.AuthListPoster, mm_cfg.AuthListModerator,
         mm_cfg.AuthListAdmin), passwd):
        # BAW: should we definitely deny if the password exists but does not
        # match?  For now we'll let it percolate up for further determination.
        msgdata['approved'] = 1
        # Used by the Emergency module
        msgdata['adminapproved'] = 1
    # has this message already been posted to this list?
    beentheres = [s.strip().lower() for s in msg.get_all('x-beenthere', [])]
    if mlist.GetListEmail().lower() in beentheres:
        raise Errors.LoopError