def scrub_msg822(self, part):
        # submessage
        submsg = part.get_payload(0)
        omask = os.umask(002)
        try:
            url = save_attachment(self.mlist, part, self.dir)
        finally:
            os.umask(omask)
        subject = submsg.get('subject', _('no subject'))
        subject = Utils.oneline(subject, self.lcset)
        date = submsg.get('date', _('no date'))
        who = submsg.get('from', _('unknown sender'))
        who = Utils.oneline(who, self.lcset)
        size = len(str(submsg))
        self.msgtexts.append(unicode(_("""\
An embedded message was scrubbed...
From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
URL: %(url)s
"""), self.lcset))
        # Replace this part because subparts should not be walk()-ed.
        del part['content-type']
        part.set_payload('blah blah', 'us-ascii')
Beispiel #2
0
def process(mlist, msg, msgdata):
    # Extract the sender's address and find them in the user database
    sender = msgdata.get('original_sender', msg.get_sender())
    try:
        ack = mlist.getMemberOption(sender, mm_cfg.AcknowledgePosts)
        if not ack:
            return
    except Errors.NotAMemberError:
        return
    # Okay, they want acknowledgement of their post.  Give them their original
    # subject.  BAW: do we want to use the decoded header?
    origsubj = msgdata.get('origsubj', msg.get('subject', _('(no subject)')))
    # Get the user's preferred language
    lang = msgdata.get('lang', mlist.getMemberLanguage(sender))
    # Now get the acknowledgement template
    realname = mlist.real_name
    text = Utils.maketext(
        'postack.txt', {
            'subject': Utils.oneline(origsubj, Utils.GetCharSet(lang)),
            'listname': realname,
            'listinfo_url': mlist.GetScriptURL('listinfo', absolute=1),
            'optionsurl': mlist.GetOptionsURL(sender, absolute=1),
        },
        lang=lang,
        mlist=mlist,
        raw=1)
    # Craft the outgoing message, with all headers and attributes
    # necessary for general delivery.  Then enqueue it to the outgoing
    # queue.
    subject = _('%(realname)s post acknowledgement')
    usermsg = Message.UserNotification(sender, mlist.GetBouncesEmail(),
                                       subject, text, lang)
    usermsg.send(mlist)
Beispiel #3
0
def process(mlist, msg, msgdata):
    # Extract the sender's address and find them in the user database
    sender = msgdata.get('original_sender', msg.get_sender())
    try:
        ack = mlist.getMemberOption(sender, mm_cfg.AcknowledgePosts)
        if not ack:
            return
    except Errors.NotAMemberError:
        return
    # Okay, they want acknowledgement of their post.  Give them their original
    # subject.  BAW: do we want to use the decoded header?
    origsubj = msgdata.get('origsubj', msg.get('subject', _('(no subject)')))
    # Get the user's preferred language
    lang = msgdata.get('lang', mlist.getMemberLanguage(sender))
    # Now get the acknowledgement template
    realname = mlist.real_name
    text = Utils.maketext(
        'postack.txt',
        {'subject'     : Utils.oneline(origsubj, Utils.GetCharSet(lang)),
         'listname'    : realname,
         'listinfo_url': mlist.GetScriptURL('listinfo', absolute=1),
         'optionsurl'  : mlist.GetOptionsURL(sender, absolute=1),
         }, lang=lang, mlist=mlist, raw=1)
    # Craft the outgoing message, with all headers and attributes
    # necessary for general delivery.  Then enqueue it to the outgoing
    # queue.
    subject = _('%(realname)s post acknowledgement')
    usermsg = Message.UserNotification(sender, mlist.GetBouncesEmail(),
                                       subject, text, lang)
    usermsg.send(mlist)
Beispiel #4
0
def quote(s, is_header=False):
    if is_header:
        h = Utils.oneline(s, 'iso-8859-1')
    else:
        h = s
    h = str('').join(re.split('[\x00-\x08\x0B-\x1f]+', h))
    return Utils.uquote(
        h.replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;'))
Beispiel #5
0
def process(mlist, msg, msgdata):
    try:
	confs_by_list = mm_cfg.REWRITE_FROM
    except AttributeError:
	return

    if mlist.internal_name() in confs_by_list:
	conf = confs_by_list[mlist.internal_name()]
    elif '*' in confs_by_list:
	conf = confs_by_list['*']
    else:
	return

    from_name_fmt = conf.get('from_name', '%(from_name)s')
    from_address_fmt = conf.get('from_address', '%(from_address)s')
    save_original = conf.get('save_original')

    d = SafeDict({'list_real_name':	mlist.real_name,
                  'list_name':		mlist.internal_name(),
		  'list_address':	mlist.GetListEmail(),
                  'list_domain':	mlist.host_name,
                  'list_desc':		mlist.description,
                  'list_info':		mlist.info,
                  })

    lcset = Utils.GetCharSet(mlist.preferred_language)

    from_name, from_address = parseaddr(msg.get('From'))
    d['from_address'] = from_address
    try:
	d['from_local'], d['from_domain'] = re.split('@', from_address, 1)
    except ValueError:
	d['from_local'] = from_address
	d['from_domain'] = ''

    if from_name != '':
	d['from_name'] = Utils.oneline(from_name, lcset)
    else:
	d['from_name'] = d['from_local']

    try:
	membername = mlist.getMemberName(from_address) or None
	try:
	    d['from_membername'] = membername.encode(lcset)
	except (AttributeError, UnicodeError):
	    d['from_membername'] = d['from_name']
    except Errors.NotAMemberError:
	d['from_membername'] = d['from_name']

    from_name = from_name_fmt % d
    from_address = from_address_fmt % d

    if save_original:
	msg[save_original] = msg['From']
    del msg['From']
    msg['From'] = formataddr((from_name, from_address))
    def scrub_any(self, part):
        # Images and MS Office files and all
        payload = part.get_payload(decode=True)
        ctype = part.get_content_type()
        # XXX email 2.5 special care is omitted.
        size = len(payload)
        omask = os.umask(002)
        try:
            url = save_attachment(self.mlist, part, self.dir)
        finally:
            os.umask(omask)
        desc = part.get('content-description', _('not available'))
        desc = Utils.oneline(desc, self.lcset)
        filename = part.get_filename(_('not available'))
        filename = Utils.oneline(filename, self.lcset)
        self.msgtexts.append(unicode(_("""\
A non-text attachment was scrubbed...
Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
URL: %(url)s
"""), self.lcset))
    def scrub_text(self, part):
        # Plain text scrubber.
        omask = os.umask(002)
        try:
            url = save_attachment(self.mlist, part, self.dir)
        finally:
            os.umask(omask)
        filename = part.get_filename(_('not available'))
        filename = Utils.oneline(filename, self.lcset)
        self.msgtexts.append(unicode(_("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
URL: %(url)s
"""), self.lcset))
Beispiel #8
0
def quote(s, is_header=False):
    if is_header:
        h = Utils.oneline(s, 'utf-8')
    else:
        h = s

    # Remove illegal XML characters
    # Try to decode UTF-8, so that Utils.uquote can escape multibyte characters
    # correctly.
    try:
        hclean = h.decode('utf-8')
        hclean = u''.join(re.split(u'[\x00-\x08\x0B-\x1f]+', hclean))
    except UnicodeDecodeError:
        hclean = ''.join(re.split('[\x00-\x08\x0B-\x1f]+', h))
    return Utils.uquote(hclean.replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;'))
Beispiel #9
0
def quote(s, is_header=False):
    if is_header:
        h = Utils.oneline(s, 'utf-8')
    else:
        h = s

    # Remove illegal XML characters
    # Try to decode UTF-8, so that Utils.uquote can escape multibyte characters
    # correctly.
    try:
        hclean = h.decode('utf-8')
        hclean = u''.join(re.split(u'[\x00-\x08\x0B-\x1f]+', hclean))
    except UnicodeDecodeError:
        hclean = ''.join(re.split('[\x00-\x08\x0B-\x1f]+', h))
    return Utils.uquote(hclean.replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;'))
Beispiel #10
0
def process(mlist, msg, msgdata):
    try:
	confs_by_list = mm_cfg.ADD_HEADERS
    except AttributeError:
	return

    if mlist.internal_name() in confs_by_list:
	conf = confs_by_list[mlist.internal_name()]
    elif '*' in confs_by_list:
	conf = confs_by_list['*']
    else:
	return

    d = SafeDict({'list_real_name':	mlist.real_name,
                  'list_name':		mlist.internal_name(),
		  'list_address':	mlist.GetListEmail(),
                  'list_domain':	mlist.host_name,
                  'list_desc':		mlist.description,
                  'list_info':		mlist.info,
                  'post_id':		mlist.post_id,
                  })

    lcset = Utils.GetCharSet(mlist.preferred_language)
    d['from_header'] = msg.get('From')
    from_name, from_address = parseaddr(d['from_header'])
    d['from_address'] = from_address
    try:
	d['from_local'], d['from_domain'] = re.split('@', from_address, 1)
    except ValueError:
	d['from_local'] = from_address
	d['from_domain'] = ''
    if from_name != '':
	d['from_name'] = Utils.oneline(from_name, lcset)
    else:
	d['from_name'] = d['from_local']
    try:
	membername = mlist.getMemberName(from_address) or None
	try:
	    d['from_membername'] = membername.encode(lcset)
	except (AttributeError, UnicodeError):
	    d['from_membername'] = d['from_name']
    except Errors.NotAMemberError:
	d['from_membername'] = d['from_name']

    for name, value_fmt in conf.items():
        value = value_fmt % d
        change_header(name, value, mlist, msg, msgdata, delete=False)
Beispiel #11
0
 def BounceMessage(self, msg, msgdata, e=None):
     # Bounce a message back to the sender, with an error message if
     # provided in the exception argument.
     sender = msg.get_sender()
     subject = msg.get("subject", _("(no subject)"))
     subject = Utils.oneline(subject, Utils.GetCharSet(self.preferred_language))
     if e is None:
         notice = _("[No bounce details are available]")
     else:
         notice = _(e.notice())
     # Currently we always craft bounces as MIME messages.
     bmsg = Message.UserNotification(msg.get_sender(), self.GetOwnerEmail(), subject, lang=self.preferred_language)
     # BAW: Be sure you set the type before trying to attach, or you'll get
     # a MultipartConversionError.
     bmsg.set_type("multipart/mixed")
     txt = MIMEText(notice, _charset=Utils.GetCharSet(self.preferred_language))
     bmsg.attach(txt)
     bmsg.attach(MIMEMessage(msg))
     bmsg.send(self)
Beispiel #12
0
 def BounceMessage(self, msg, msgdata, e=None):
     # Bounce a message back to the sender, with an error message if
     # provided in the exception argument.
     sender = msg.get_sender()
     subject = msg.get('subject', _('(no subject)'))
     subject = Utils.oneline(subject,
                             Utils.GetCharSet(self.preferred_language))
     if e is None:
         notice = _('[No bounce details are available]')
     else:
         notice = _(e.notice())
     # Currently we always craft bounces as MIME messages.
     bmsg = Message.UserNotification(msg.get_sender(),
                                     self.GetOwnerEmail(),
                                     subject,
                                     lang=self.preferred_language)
     # BAW: Be sure you set the type before trying to attach, or you'll get
     # a MultipartConversionError.
     bmsg.set_type('multipart/mixed')
     txt = MIMEText(notice,
                    _charset=Utils.GetCharSet(self.preferred_language))
     bmsg.attach(txt)
     bmsg.attach(MIMEMessage(msg))
     bmsg.send(self)
Beispiel #13
0
     mcset = 'us-ascii'
 lcset = Utils.GetCharSet(mlist.preferred_language)
 if mcset <> lcset:
     try:
         body = unicode(body, mcset).encode(lcset, 'replace')
     except (LookupError, UnicodeError, ValueError):
         pass
 hdrtxt = NL.join(['%s: %s' % (k, v) for k, v in msg.items()])
 hdrtxt = Utils.websafe(hdrtxt)
 # Okay, we've reconstituted the message just fine.  Now for the fun part!
 t = Table(cellspacing=0, cellpadding=0, width='100%')
 t.AddRow([Bold(_('From:')), sender])
 row, col = t.GetCurrentRowIndex(), t.GetCurrentCellIndex()
 t.AddCellInfo(row, col-1, align='right')
 t.AddRow([Bold(_('Subject:')),
           Utils.websafe(Utils.oneline(subject, lcset))])
 t.AddCellInfo(row+1, col-1, align='right')
 t.AddRow([Bold(_('Reason:')), _(reason)])
 t.AddCellInfo(row+2, col-1, align='right')
 when = msgdata.get('received_time')
 if when:
     t.AddRow([Bold(_('Received:')), time.ctime(when)])
     t.AddCellInfo(row+2, col-1, align='right')
 # We can't use a RadioButtonArray here because horizontal placement can be
 # confusing to the user and vertical placement takes up too much
 # real-estate.  This is a hack!
 buttons = Table(cellspacing="5", cellpadding="0")
 buttons.AddRow(map(lambda x, s='&nbsp;'*5: s+x+s,
                    (_('Defer'), _('Approve'), _('Reject'), _('Discard'))))
 buttons.AddRow([Center(RadioButton(id, mm_cfg.DEFER, 1)),
                 Center(RadioButton(id, mm_cfg.APPROVE, 0)),
Beispiel #14
0
 def _decode(h):
     if not h:
         return h
     return Utils.oneline(h, Utils.GetCharSet(mlist.preferred_language))
Beispiel #15
0
def process(mlist, msg, msgdata=None):
    sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER
    outer = True
    if msgdata is None:
        msgdata = {}
    if msgdata:
        # msgdata is available if it is in GLOBAL_PIPELINE
        # ie. not in digest or archiver
        # check if the list owner want to scrub regular delivery
        if not mlist.scrub_nondigest:
            return
    dir = calculate_attachments_dir(mlist, msg, msgdata)
    charset = None
    lcset = Utils.GetCharSet(mlist.preferred_language)
    lcset_out = Charset(lcset).output_charset or lcset
    # Now walk over all subparts of this message and scrub out various types
    format = delsp = None
    for part in msg.walk():
        ctype = part.get_content_type()
        # If the part is text/plain, we leave it alone
        if ctype == 'text/plain':
            # We need to choose a charset for the scrubbed message, so we'll
            # arbitrarily pick the charset of the first text/plain part in the
            # message.
            # MAS: Also get the RFC 3676 stuff from this part. This seems to
            # work OK for scrub_nondigest.  It will also work as far as
            # scrubbing messages for the archive is concerned, but pipermail
            # doesn't pay any attention to the RFC 3676 parameters.  The plain
            # format digest is going to be a disaster in any case as some of
            # messages will be format="flowed" and some not.  ToDigest creates
            # its own Content-Type: header for the plain digest which won't
            # have RFC 3676 parameters. If the message Content-Type: headers
            # are retained for display in the digest, the parameters will be
            # there for information, but not for the MUA. This is the best we
            # can do without having get_payload() process the parameters.
            if charset is None:
                charset = part.get_content_charset(lcset)
                format = part.get_param('format')
                delsp = part.get_param('delsp')
            # TK: if part is attached then check charset and scrub if none
            if part.get('content-disposition') and \
               not part.get_content_charset():
                omask = os.umask(002)
                try:
                    url = save_attachment(mlist, part, dir)
                finally:
                    os.umask(omask)
                filename = part.get_filename(_('not available'))
                filename = Utils.oneline(filename, lcset)
                replace_payload_by_text(part, _("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
URL: %(url)s
"""), lcset)
        elif ctype == 'text/html' and isinstance(sanitize, IntType):
            if sanitize == 0:
                if outer:
                    raise DiscardMessage
                replace_payload_by_text(part,
                                 _('HTML attachment scrubbed and removed'),
                                 # Adding charset arg and removing content-type
                                 # sets content-type to text/plain
                                 lcset)
            elif sanitize == 2:
                # By leaving it alone, Pipermail will automatically escape it
                pass
            elif sanitize == 3:
                # Pull it out as an attachment but leave it unescaped.  This
                # is dangerous, but perhaps useful for heavily moderated
                # lists.
                omask = os.umask(002)
                try:
                    url = save_attachment(mlist, part, dir, filter_html=False)
                finally:
                    os.umask(omask)
                replace_payload_by_text(part, _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
            else:
                # HTML-escape it and store it as an attachment, but make it
                # look a /little/ bit prettier. :(
                payload = Utils.websafe(part.get_payload(decode=True))
                # For whitespace in the margin, change spaces into
                # non-breaking spaces, and tabs into 8 of those.  Then use a
                # mono-space font.  Still looks hideous to me, but then I'd
                # just as soon discard them.
                def doreplace(s):
                    return s.replace(' ', '&nbsp;').replace('\t', '&nbsp'*8)
                lines = [doreplace(s) for s in payload.split('\n')]
                payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
                part.set_payload(payload)
                # We're replacing the payload with the decoded payload so this
                # will just get in the way.
                del part['content-transfer-encoding']
                omask = os.umask(002)
                try:
                    url = save_attachment(mlist, part, dir, filter_html=False)
                finally:
                    os.umask(omask)
                replace_payload_by_text(part, _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
        elif ctype == 'message/rfc822':
            # This part contains a submessage, so it too needs scrubbing
            submsg = part.get_payload(0)
            omask = os.umask(002)
            try:
                url = save_attachment(mlist, part, dir)
            finally:
                os.umask(omask)
            subject = submsg.get('subject', _('no subject'))
            date = submsg.get('date', _('no date'))
            who = submsg.get('from', _('unknown sender'))
            size = len(str(submsg))
            replace_payload_by_text(part, _("""\
An embedded message was scrubbed...
From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
URL: %(url)s
"""), lcset)
        # If the message isn't a multipart, then we'll strip it out as an
        # attachment that would have to be separately downloaded.  Pipermail
        # will transform the url into a hyperlink.
        elif part.get_payload() and not part.is_multipart():
            payload = part.get_payload(decode=True)
            ctype = part.get_content_type()
            # XXX Under email 2.5, it is possible that payload will be None.
            # This can happen when you have a Content-Type: multipart/* with
            # only one part and that part has two blank lines between the
            # first boundary and the end boundary.  In email 3.0 you end up
            # with a string in the payload.  I think in this case it's safe to
            # ignore the part.
            if payload is None:
                continue
            size = len(payload)
            omask = os.umask(002)
            try:
                url = save_attachment(mlist, part, dir)
            finally:
                os.umask(omask)
            desc = part.get('content-description', _('not available'))
            desc = Utils.oneline(desc, lcset)
            filename = part.get_filename(_('not available'))
            filename = Utils.oneline(filename, lcset)
            replace_payload_by_text(part, _("""\
A non-text attachment was scrubbed...
Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
URL: %(url)s
"""), lcset)
        outer = False
    # We still have to sanitize multipart messages to flat text because
    # Pipermail can't handle messages with list payloads.  This is a kludge;
    # def (n) clever hack ;).
    if msg.is_multipart() and sanitize <> 2:
        # By default we take the charset of the first text/plain part in the
        # message, but if there was none, we'll use the list's preferred
        # language's charset.
        if not charset or charset == 'us-ascii':
            charset = lcset_out
        else:
            # normalize to the output charset if input/output are different
            charset = Charset(charset).output_charset or charset
        # We now want to concatenate all the parts which have been scrubbed to
        # text/plain, into a single text/plain payload.  We need to make sure
        # all the characters in the concatenated string are in the same
        # encoding, so we'll use the 'replace' key in the coercion call.
        # BAW: Martin's original patch suggested we might want to try
        # generalizing to utf-8, and that's probably a good idea (eventually).
        text = []
        for part in msg.walk():
            # TK: bug-id 1099138 and multipart
            # MAS test payload - if part may fail if there are no headers.
            if not part.get_payload() or part.is_multipart():
                continue
            # All parts should be scrubbed to text/plain by now.
            partctype = part.get_content_type()
            if partctype <> 'text/plain':
                text.append(_('Skipped content of type %(partctype)s\n'))
                continue
            try:
                t = part.get_payload(decode=True) or ''
            # MAS: TypeError exception can occur if payload is None. This
            # was observed with a message that contained an attached
            # message/delivery-status part. Because of the special parsing
            # of this type, this resulted in a text/plain sub-part with a
            # null body. See bug 1430236.
            except (binascii.Error, TypeError):
                t = part.get_payload() or ''
            # TK: get_content_charset() returns 'iso-2022-jp' for internally
            # crafted (scrubbed) 'euc-jp' text part. So, first try
            # get_charset(), then get_content_charset() for the parts
            # which are already embeded in the incoming message.
            partcharset = part.get_charset()
            if partcharset:
                partcharset = str(partcharset)
            else:
                partcharset = part.get_content_charset()
            if partcharset and partcharset <> charset:
                try:
                    t = unicode(t, partcharset, 'replace')
                except (UnicodeError, LookupError, ValueError,
                        AssertionError):
                    # We can get here if partcharset is bogus in come way.
                    # Replace funny characters.  We use errors='replace'
                    t = unicode(t, 'ascii', 'replace')
                try:
                    # Should use HTML-Escape, or try generalizing to UTF-8
                    t = t.encode(charset, 'replace')
                except (UnicodeError, LookupError, ValueError,
                        AssertionError):
                    # if the message charset is bogus, use the list's.
                    t = t.encode(lcset, 'replace')
            # Separation is useful
            if isinstance(t, StringType):
                if not t.endswith('\n'):
                    t += '\n'
                text.append(t)
        # Now join the text and set the payload
        sep = _('-------------- next part --------------\n')
        # The i18n separator is in the list's charset. Coerce it to the
        # message charset.
        try:
            s = unicode(sep, lcset, 'replace')
            sep = s.encode(charset, 'replace')
        except (UnicodeError, LookupError, ValueError,
                AssertionError):
            pass
        replace_payload_by_text(msg, sep.join(text), charset)
        if format:
            msg.set_param('Format', format)
        if delsp:
            msg.set_param('DelSp', delsp)
    return msg
Beispiel #16
0
def send_i18n_digests(mlist, mboxfp):
    mbox = Mailbox(mboxfp)
    # Prepare common information (first lang/charset)
    lang = mlist.preferred_language
    lcset = Utils.GetCharSet(lang)
    lcset_out = Charset(lcset).output_charset or lcset
    # Common Information (contd)
    realname = mlist.real_name
    volume = mlist.volume
    issue = mlist.next_digest_number
    digestid = _('%(realname)s Digest, Vol %(volume)d, Issue %(issue)d')
    digestsubj = Header(digestid, lcset, header_name='Subject')
    # Set things up for the MIME digest.  Only headers not added by
    # CookHeaders need be added here.
    # Date/Message-ID should be added here also.
    mimemsg = Message.Message()
    mimemsg['Content-Type'] = 'multipart/mixed'
    mimemsg['MIME-Version'] = '1.0'
    mimemsg['From'] = mlist.GetRequestEmail()
    mimemsg['Subject'] = digestsubj
    mimemsg['To'] = mlist.GetListEmail()
    mimemsg['Reply-To'] = mlist.GetListEmail()
    mimemsg['Date'] = formatdate(localtime=1)
    mimemsg['Message-ID'] = Utils.unique_message_id(mlist)
    # Set things up for the rfc1153 digest
    plainmsg = StringIO()
    rfc1153msg = Message.Message()
    rfc1153msg['From'] = mlist.GetRequestEmail()
    rfc1153msg['Subject'] = digestsubj
    rfc1153msg['To'] = mlist.GetListEmail()
    rfc1153msg['Reply-To'] = mlist.GetListEmail()
    rfc1153msg['Date'] = formatdate(localtime=1)
    rfc1153msg['Message-ID'] = Utils.unique_message_id(mlist)
    separator70 = '-' * 70
    separator30 = '-' * 30
    # In the rfc1153 digest, the masthead contains the digest boilerplate plus
    # any digest header.  In the MIME digests, the masthead and digest header
    # are separate MIME subobjects.  In either case, it's the first thing in
    # the digest, and we can calculate it now, so go ahead and add it now.
    mastheadtxt = Utils.maketext(
        'masthead.txt', {
            'real_name': mlist.real_name,
            'got_list_email': mlist.GetListEmail(),
            'got_listinfo_url': mlist.GetScriptURL('listinfo', absolute=1),
            'got_request_email': mlist.GetRequestEmail(),
            'got_owner_email': mlist.GetOwnerEmail(),
        },
        mlist=mlist)
    # MIME
    masthead = MIMEText(mastheadtxt, _charset=lcset)
    masthead['Content-Description'] = digestid
    mimemsg.attach(masthead)
    # RFC 1153
    print >> plainmsg, mastheadtxt
    print >> plainmsg
    # Now add the optional digest header but only if more than whitespace.
    if re.sub('\s', '', mlist.digest_header):
        headertxt = decorate(mlist, mlist.digest_header, _('digest header'))
        # MIME
        header = MIMEText(headertxt, _charset=lcset)
        header['Content-Description'] = _('Digest Header')
        mimemsg.attach(header)
        # RFC 1153
        print >> plainmsg, headertxt
        print >> plainmsg
    # Now we have to cruise through all the messages accumulated in the
    # mailbox file.  We can't add these messages to the plainmsg and mimemsg
    # yet, because we first have to calculate the table of contents
    # (i.e. grok out all the Subjects).  Store the messages in a list until
    # we're ready for them.
    #
    # Meanwhile prepare things for the table of contents
    toc = StringIO()
    print >> toc, _("Today's Topics:\n")
    # Now cruise through all the messages in the mailbox of digest messages,
    # building the MIME payload and core of the RFC 1153 digest.  We'll also
    # accumulate Subject: headers and authors for the table-of-contents.
    messages = []
    msgcount = 0
    msg = mbox.next()
    while msg is not None:
        if msg == '':
            # It was an unparseable message
            msg = mbox.next()
            continue
        msgcount += 1
        messages.append(msg)
        # Get the Subject header
        msgsubj = msg.get('subject', _('(no subject)'))
        subject = Utils.oneline(msgsubj, lcset)
        # Don't include the redundant subject prefix in the toc
        mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix),
                      subject, re.IGNORECASE)
        if mo:
            subject = subject[:mo.start(2)] + subject[mo.end(2):]
        username = ''
        addresses = getaddresses([Utils.oneline(msg.get('from', ''), lcset)])
        # Take only the first author we find
        if isinstance(addresses, ListType) and addresses:
            username = addresses[0][0]
            if not username:
                username = addresses[0][1]
        if username:
            username = '******' % username
        # Put count and Wrap the toc subject line
        wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65)
        slines = wrapped.split('\n')
        # See if the user's name can fit on the last line
        if len(slines[-1]) + len(username) > 70:
            slines.append(username)
        else:
            slines[-1] += username
        # Add this subject to the accumulating topics
        first = True
        for line in slines:
            if first:
                print >> toc, ' ', line
                first = False
            else:
                print >> toc, '     ', line.lstrip()
        # We do not want all the headers of the original message to leak
        # through in the digest messages.  For this phase, we'll leave the
        # same set of headers in both digests, i.e. those required in RFC 1153
        # plus a couple of other useful ones.  We also need to reorder the
        # headers according to RFC 1153.  Later, we'll strip out headers for
        # for the specific MIME or plain digests.
        keeper = {}
        all_keepers = {}
        for header in (mm_cfg.MIME_DIGEST_KEEP_HEADERS +
                       mm_cfg.PLAIN_DIGEST_KEEP_HEADERS):
            all_keepers[header] = True
        all_keepers = all_keepers.keys()
        for keep in all_keepers:
            keeper[keep] = msg.get_all(keep, [])
        # Now remove all unkempt headers :)
        for header in msg.keys():
            del msg[header]
        # And add back the kept header in the RFC 1153 designated order
        for keep in all_keepers:
            for field in keeper[keep]:
                msg[keep] = field
        # And a bit of extra stuff
        msg['Message'] = ` msgcount `
        # Get the next message in the digest mailbox
        msg = mbox.next()
    # Now we're finished with all the messages in the digest.  First do some
    # sanity checking and then on to adding the toc.
    if msgcount == 0:
        # Why did we even get here?
        return
    toctext = to_cset_out(toc.getvalue(), lcset)
    # MIME
    tocpart = MIMEText(toctext, _charset=lcset)
    tocpart['Content-Description'] = _(
        "Today's Topics (%(msgcount)d messages)")
    mimemsg.attach(tocpart)
    # RFC 1153
    print >> plainmsg, toctext
    print >> plainmsg
    # For RFC 1153 digests, we now need the standard separator
    print >> plainmsg, separator70
    print >> plainmsg
    # Now go through and add each message
    mimedigest = MIMEBase('multipart', 'digest')
    mimemsg.attach(mimedigest)
    first = True
    for msg in messages:
        # MIME.  Make a copy of the message object since the rfc1153
        # processing scrubs out attachments.
        mimedigest.attach(MIMEMessage(copy.deepcopy(msg)))
        # rfc1153
        if first:
            first = False
        else:
            print >> plainmsg, separator30
            print >> plainmsg
        # Use Mailman.Handlers.Scrubber.process() to get plain text
        try:
            msg = scrubber(mlist, msg)
        except Errors.DiscardMessage:
            print >> plainmsg, _('[Message discarded by content filter]')
            continue
        # Honor the default setting
        for h in mm_cfg.PLAIN_DIGEST_KEEP_HEADERS:
            if msg[h]:
                uh = Utils.wrap('%s: %s' % (h, Utils.oneline(msg[h], lcset)))
                uh = '\n\t'.join(uh.split('\n'))
                print >> plainmsg, uh
        print >> plainmsg
        # If decoded payload is empty, this may be multipart message.
        # -- just stringfy it.
        payload = msg.get_payload(decode=True) \
                  or msg.as_string().split('\n\n',1)[1]
        mcset = msg.get_content_charset('')
        if mcset and mcset <> lcset and mcset <> lcset_out:
            try:
                payload = unicode(payload, mcset,
                                  'replace').encode(lcset, 'replace')
            except (UnicodeError, LookupError):
                # TK: Message has something unknown charset.
                #     _out means charset in 'outer world'.
                payload = unicode(payload, lcset_out,
                                  'replace').encode(lcset, 'replace')
        print >> plainmsg, payload
        if not payload.endswith('\n'):
            print >> plainmsg
    # Now add the footer but only if more than whitespace.
    if re.sub('\s', '', mlist.digest_footer):
        footertxt = decorate(mlist, mlist.digest_footer, _('digest footer'))
        # MIME
        footer = MIMEText(footertxt, _charset=lcset)
        footer['Content-Description'] = _('Digest Footer')
        mimemsg.attach(footer)
        # RFC 1153
        # MAS: There is no real place for the digest_footer in an RFC 1153
        # compliant digest, so add it as an additional message with
        # Subject: Digest Footer
        print >> plainmsg, separator30
        print >> plainmsg
        print >> plainmsg, 'Subject: ' + _('Digest Footer')
        print >> plainmsg
        print >> plainmsg, footertxt
        print >> plainmsg
        print >> plainmsg, separator30
        print >> plainmsg
    # Do the last bit of stuff for each digest type
    signoff = _('End of ') + digestid
    # MIME
    # BAW: This stuff is outside the normal MIME goo, and it's what the old
    # MIME digester did.  No one seemed to complain, probably because you
    # won't see it in an MUA that can't display the raw message.  We've never
    # got complaints before, but if we do, just wax this.  It's primarily
    # included for (marginally useful) backwards compatibility.
    mimemsg.postamble = signoff
    # rfc1153
    print >> plainmsg, signoff
    print >> plainmsg, '*' * len(signoff)
    # Do our final bit of housekeeping, and then send each message to the
    # outgoing queue for delivery.
    mlist.next_digest_number += 1
    virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR)
    # Calculate the recipients lists
    plainrecips = []
    mimerecips = []
    drecips = mlist.getDigestMemberKeys() + mlist.one_last_digest.keys()
    for user in mlist.getMemberCPAddresses(drecips):
        # user might be None if someone who toggled off digest delivery
        # subsequently unsubscribed from the mailing list.  Also, filter out
        # folks who have disabled delivery.
        if user is None or mlist.getDeliveryStatus(user) <> ENABLED:
            continue
        # Otherwise, decide whether they get MIME or RFC 1153 digests
        if mlist.getMemberOption(user, mm_cfg.DisableMime):
            plainrecips.append(user)
        else:
            mimerecips.append(user)
    # Zap this since we're now delivering the last digest to these folks.
    mlist.one_last_digest.clear()
    # MIME
    virginq.enqueue(mimemsg,
                    recips=mimerecips,
                    listname=mlist.internal_name(),
                    isdigest=True)
    # RFC 1153
    rfc1153msg.set_payload(to_cset_out(plainmsg.getvalue(), lcset), lcset)
    virginq.enqueue(rfc1153msg,
                    recips=plainrecips,
                    listname=mlist.internal_name(),
                    isdigest=True)
Beispiel #17
0
def save_attachment(mlist, msg, dir, filter_html=True):
    fsdir = os.path.join(mlist.archive_dir(), dir)
    makedirs(fsdir)
    # Figure out the attachment type and get the decoded data
    decodedpayload = msg.get_payload(decode=True)
    # BAW: mimetypes ought to handle non-standard, but commonly found types,
    # e.g. image/jpg (should be image/jpeg).  For now we just store such
    # things as application/octet-streams since that seems the safest.
    ctype = msg.get_content_type()
    # i18n file name is encoded
    lcset = Utils.GetCharSet(mlist.preferred_language)
    filename = Utils.oneline(msg.get_filename(''), lcset)
    filename, fnext = os.path.splitext(filename)
    # For safety, we should confirm this is valid ext for content-type
    # but we can use fnext if we introduce fnext filtering
    if mm_cfg.SCRUBBER_USE_ATTACHMENT_FILENAME_EXTENSION:
        # HTML message doesn't have filename :-(
        ext = fnext or guess_extension(ctype, fnext)
    else:
        ext = guess_extension(ctype, fnext)
    if not ext:
        # We don't know what it is, so assume it's just a shapeless
        # application/octet-stream, unless the Content-Type: is
        # message/rfc822, in which case we know we'll coerce the type to
        # text/plain below.
        if ctype == 'message/rfc822':
            ext = '.txt'
        else:
            ext = '.bin'
    # Allow only alphanumerics, dash, underscore, and dot
    ext = sre.sub('', ext)
    path = None
    # We need a lock to calculate the next attachment number
    lockfile = os.path.join(fsdir, 'attachments.lock')
    lock = LockFile.LockFile(lockfile)
    lock.lock()
    try:
        # Now base the filename on what's in the attachment, uniquifying it if
        # necessary.
        if not filename or mm_cfg.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME:
            filebase = 'attachment'
        else:
            # Sanitize the filename given in the message headers
            parts = pre.split(filename)
            filename = parts[-1]
            # Strip off leading dots
            filename = dre.sub('', filename)
            # Allow only alphanumerics, dash, underscore, and dot
            filename = sre.sub('', filename)
            # If the filename's extension doesn't match the type we guessed,
            # which one should we go with?  For now, let's go with the one we
            # guessed so attachments can't lie about their type.  Also, if the
            # filename /has/ no extension, then tack on the one we guessed.
            # The extension was removed from the name above.
            filebase = filename
        # Now we're looking for a unique name for this file on the file
        # system.  If msgdir/filebase.ext isn't unique, we'll add a counter
        # after filebase, e.g. msgdir/filebase-cnt.ext
        counter = 0
        extra = ''
        while True:
            path = os.path.join(fsdir, filebase + extra + ext)
            # Generally it is not a good idea to test for file existance
            # before just trying to create it, but the alternatives aren't
            # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't
            # NFS-safe).  Besides, we have an exclusive lock now, so we're
            # guaranteed that no other process will be racing with us.
            if os.path.exists(path):
                counter += 1
                extra = '-%04d' % counter
            else:
                break
    finally:
        lock.unlock()
    # `path' now contains the unique filename for the attachment.  There's
    # just one more step we need to do.  If the part is text/html and
    # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be
    # here), then send the attachment through the filter program for
    # sanitization
    if filter_html and ctype == 'text/html':
        base, ext = os.path.splitext(path)
        tmppath = base + '-tmp' + ext
        fp = open(tmppath, 'w')
        try:
            fp.write(decodedpayload)
            fp.close()
            cmd = mm_cfg.ARCHIVE_HTML_SANITIZER % {'filename' : tmppath}
            progfp = os.popen(cmd, 'r')
            decodedpayload = progfp.read()
            status = progfp.close()
            if status:
                syslog('error',
                       'HTML sanitizer exited with non-zero status: %s',
                       status)
        finally:
            os.unlink(tmppath)
        # BAW: Since we've now sanitized the document, it should be plain
        # text.  Blarg, we really want the sanitizer to tell us what the type
        # if the return data is. :(
        ext = '.txt'
        path = base + '.txt'
    # Is it a message/rfc822 attachment?
    elif ctype == 'message/rfc822':
        submsg = msg.get_payload()
        # BAW: I'm sure we can eventually do better than this. :(
        decodedpayload = Utils.websafe(str(submsg))
    fp = open(path, 'w')
    fp.write(decodedpayload)
    fp.close()
    # Now calculate the url
    baseurl = mlist.GetBaseArchiveURL()
    # Private archives will likely have a trailing slash.  Normalize.
    if baseurl[-1] <> '/':
        baseurl += '/'
    # A trailing space in url string may save users who are using
    # RFC-1738 compliant MUA (Not Mozilla).
    # Trailing space will definitely be a problem with format=flowed.
    # Bracket the URL instead.
    url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
    return url
Beispiel #18
0
         pass
     # Queue the file for delivery by qrunner.  Trying to deliver the
     # message directly here can lead to a huge delay in web
     # turnaround.  Log the moderation and add a header.
     msg['X-Mailman-Approved-At'] = email.Utils.formatdate(localtime=1)
     syslog('vette', '%s: held message approved, message-id: %s',
            self.internal_name(), msg.get('message-id', 'n/a'))
     # Stick the message back in the incoming queue for further
     # processing.
     inq = get_switchboard(mm_cfg.INQUEUE_DIR)
     inq.enqueue(msg, _metadata=msgdata)
 elif value == mm_cfg.REJECT:
     # Rejected
     rejection = 'Refused'
     lang = self.getMemberLanguage(sender)
     subject = Utils.oneline(subject, Utils.GetCharSet(lang))
     self.__refuse(_('Posting of your message titled "%(subject)s"'),
                   sender,
                   comment or _('[No reason given]'),
                   lang=lang)
 else:
     assert value == mm_cfg.DISCARD
     # Discarded
     rejection = 'Discarded'
 # Forward the message
 if forward and addr:
     # If we've approved the message, we need to be sure to craft a
     # completely unique second message for the forwarding operation,
     # since we don't want to share any state or information with the
     # normal delivery.
     try:
def send_i18n_digests(mlist, mboxfp):
    mbox = Mailbox(mboxfp)
    # Prepare common information (first lang/charset)
    lang = mlist.preferred_language
    lcset = Utils.GetCharSet(lang)
    lcset_out = Charset(lcset).output_charset or lcset
    # Common Information (contd)
    realname = mlist.real_name
    volume = mlist.volume
    issue = mlist.next_digest_number
    digestid = _('%(realname)s Digest, Vol %(volume)d, Issue %(issue)d')
    digestsubj = Header(digestid, lcset, header_name='Subject')
    # Set things up for the MIME digest.  Only headers not added by
    # CookHeaders need be added here.
    # Date/Message-ID should be added here also.
    mimemsg = Message.Message()
    mimemsg['Content-Type'] = 'multipart/mixed'
    mimemsg['MIME-Version'] = '1.0'
    mimemsg['From'] = mlist.GetRequestEmail()
    mimemsg['Subject'] = digestsubj
    mimemsg['To'] = mlist.GetListEmail()
    mimemsg['Reply-To'] = mlist.GetListEmail()
    mimemsg['Date'] = formatdate(localtime=1)
    mimemsg['Message-ID'] = Utils.unique_message_id(mlist)
    # Set things up for the rfc1153 digest
    plainmsg = StringIO()
    rfc1153msg = Message.Message()
    rfc1153msg['From'] = mlist.GetRequestEmail()
    rfc1153msg['Subject'] = digestsubj
    rfc1153msg['To'] = mlist.GetListEmail()
    rfc1153msg['Reply-To'] = mlist.GetListEmail()
    rfc1153msg['Date'] = formatdate(localtime=1)
    rfc1153msg['Message-ID'] = Utils.unique_message_id(mlist)
    separator70 = '-' * 70
    separator30 = '-' * 30
    # In the rfc1153 digest, the masthead contains the digest boilerplate plus
    # any digest header.  In the MIME digests, the masthead and digest header
    # are separate MIME subobjects.  In either case, it's the first thing in
    # the digest, and we can calculate it now, so go ahead and add it now.
    mastheadtxt = Utils.maketext(
        'masthead.txt',
        {'real_name' :        mlist.real_name,
         'got_list_email':    mlist.GetListEmail(),
         'got_listinfo_url':  mlist.GetScriptURL('listinfo', absolute=1),
         'got_request_email': mlist.GetRequestEmail(),
         'got_owner_email':   mlist.GetOwnerEmail(),
         }, mlist=mlist)
    # MIME
    masthead = MIMEText(mastheadtxt, _charset=lcset)
    masthead['Content-Description'] = digestid
    mimemsg.attach(masthead)
    # RFC 1153
    print >> plainmsg, mastheadtxt
    print >> plainmsg
    # Now add the optional digest header but only if more than whitespace.
    if re.sub('\s', '', mlist.digest_header):
        headertxt = decorate(mlist, mlist.digest_header, _('digest header'))
        # MIME
        header = MIMEText(headertxt, _charset=lcset)
        header['Content-Description'] = _('Digest Header')
        mimemsg.attach(header)
        # RFC 1153
        print >> plainmsg, headertxt
        print >> plainmsg
    # Now we have to cruise through all the messages accumulated in the
    # mailbox file.  We can't add these messages to the plainmsg and mimemsg
    # yet, because we first have to calculate the table of contents
    # (i.e. grok out all the Subjects).  Store the messages in a list until
    # we're ready for them.
    #
    # Meanwhile prepare things for the table of contents
    toc = StringIO()
    print >> toc, _("Today's Topics:\n")
    # Now cruise through all the messages in the mailbox of digest messages,
    # building the MIME payload and core of the RFC 1153 digest.  We'll also
    # accumulate Subject: headers and authors for the table-of-contents.
    messages = []
    msgcount = 0
    msg = mbox.next()
    while msg is not None:
        if msg == '':
            # It was an unparseable message
            msg = mbox.next()
            continue
        msgcount += 1
        messages.append(msg)
        # Get the Subject header
        msgsubj = msg.get('subject', _('(no subject)'))
        subject = Utils.oneline(msgsubj, lcset)
        # Don't include the redundant subject prefix in the toc
        mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix),
                      subject, re.IGNORECASE)
        if mo:
            subject = subject[:mo.start(2)] + subject[mo.end(2):]
        username = ''
        addresses = getaddresses([Utils.oneline(msg.get('from', ''), lcset)])
        # Take only the first author we find
        if isinstance(addresses, ListType) and addresses:
            username = addresses[0][0]
            if not username:
                username = addresses[0][1]
        if username:
            username = '******' % username
        # Put count and Wrap the toc subject line
        wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65)
        slines = wrapped.split('\n')
        # See if the user's name can fit on the last line
        if len(slines[-1]) + len(username) > 70:
            slines.append(username)
        else:
            slines[-1] += username
        # Add this subject to the accumulating topics
        first = True
        for line in slines:
            if first:
                print >> toc, ' ', line
                first = False
            else:
                print >> toc, '     ', line.lstrip()
        # We do not want all the headers of the original message to leak
        # through in the digest messages.  For this phase, we'll leave the
        # same set of headers in both digests, i.e. those required in RFC 1153
        # plus a couple of other useful ones.  We also need to reorder the
        # headers according to RFC 1153.  Later, we'll strip out headers for
        # for the specific MIME or plain digests.
        keeper = {}
        all_keepers = {}
        for header in (mm_cfg.MIME_DIGEST_KEEP_HEADERS +
                       mm_cfg.PLAIN_DIGEST_KEEP_HEADERS):
            all_keepers[header] = True
        all_keepers = all_keepers.keys()
        for keep in all_keepers:
            keeper[keep] = msg.get_all(keep, [])
        # Now remove all unkempt headers :)
        for header in msg.keys():
            del msg[header]
        # And add back the kept header in the RFC 1153 designated order
        for keep in all_keepers:
            for field in keeper[keep]:
                msg[keep] = field
        # And a bit of extra stuff
        msg['Message'] = `msgcount`
        # Get the next message in the digest mailbox
        msg = mbox.next()
    # Now we're finished with all the messages in the digest.  First do some
    # sanity checking and then on to adding the toc.
    if msgcount == 0:
        # Why did we even get here?
        return
    toctext = to_cset_out(toc.getvalue(), lcset)
    # MIME
    tocpart = MIMEText(toctext, _charset=lcset)
    tocpart['Content-Description']= _("Today's Topics (%(msgcount)d messages)")
    mimemsg.attach(tocpart)
    # RFC 1153
    print >> plainmsg, toctext
    print >> plainmsg
    # For RFC 1153 digests, we now need the standard separator
    print >> plainmsg, separator70
    print >> plainmsg
    # Now go through and add each message
    mimedigest = MIMEBase('multipart', 'digest')
    mimemsg.attach(mimedigest)
    first = True
    for msg in messages:
        # MIME.  Make a copy of the message object since the rfc1153
        # processing scrubs out attachments.
        mimedigest.attach(MIMEMessage(copy.deepcopy(msg)))
        # rfc1153
        if first:
            first = False
        else:
            print >> plainmsg, separator30
            print >> plainmsg
        # Use Mailman.Handlers.Scrubber.process() to get plain text
        try:
            msg = scrubber(mlist, msg)
        except Errors.DiscardMessage:
            print >> plainmsg, _('[Message discarded by content filter]')
            continue
        # Honor the default setting
        for h in mm_cfg.PLAIN_DIGEST_KEEP_HEADERS:
            if msg[h]:
                uh = Utils.wrap('%s: %s' % (h, Utils.oneline(msg[h], lcset)))
                uh = '\n\t'.join(uh.split('\n'))
                print >> plainmsg, uh
        print >> plainmsg
        # If decoded payload is empty, this may be multipart message.
        # -- just stringfy it.
        payload = msg.get_payload(decode=True) \
                  or msg.as_string().split('\n\n',1)[1]
        mcset = msg.get_content_charset('')
        if mcset and mcset <> lcset and mcset <> lcset_out:
            try:
                payload = unicode(payload, mcset, 'replace'
                          ).encode(lcset, 'replace')
            except (UnicodeError, LookupError):
                # TK: Message has something unknown charset.
                #     _out means charset in 'outer world'.
                payload = unicode(payload, lcset_out, 'replace'
                          ).encode(lcset, 'replace')
        print >> plainmsg, payload
        if not payload.endswith('\n'):
            print >> plainmsg
    # Now add the footer but only if more than whitespace.
    if re.sub('\s', '', mlist.digest_footer):
        footertxt = decorate(mlist, mlist.digest_footer, _('digest footer'))
        # MIME
        footer = MIMEText(footertxt, _charset=lcset)
        footer['Content-Description'] = _('Digest Footer')
        mimemsg.attach(footer)
        # RFC 1153
        # MAS: There is no real place for the digest_footer in an RFC 1153
        # compliant digest, so add it as an additional message with
        # Subject: Digest Footer
        print >> plainmsg, separator30
        print >> plainmsg
        print >> plainmsg, 'Subject: ' + _('Digest Footer')
        print >> plainmsg
        print >> plainmsg, footertxt
        print >> plainmsg
        print >> plainmsg, separator30
        print >> plainmsg
    # Do the last bit of stuff for each digest type
    signoff = _('End of ') + digestid
    # MIME
    # BAW: This stuff is outside the normal MIME goo, and it's what the old
    # MIME digester did.  No one seemed to complain, probably because you
    # won't see it in an MUA that can't display the raw message.  We've never
    # got complaints before, but if we do, just wax this.  It's primarily
    # included for (marginally useful) backwards compatibility.
    mimemsg.postamble = signoff
    # rfc1153
    print >> plainmsg, signoff
    print >> plainmsg, '*' * len(signoff)
    # Do our final bit of housekeeping, and then send each message to the
    # outgoing queue for delivery.
    mlist.next_digest_number += 1
    virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR)
    # Calculate the recipients lists
    plainrecips = []
    mimerecips = []
    drecips = mlist.getDigestMemberKeys() + mlist.one_last_digest.keys()
    for user in mlist.getMemberCPAddresses(drecips):
        # user might be None if someone who toggled off digest delivery
        # subsequently unsubscribed from the mailing list.  Also, filter out
        # folks who have disabled delivery.
        if user is None or mlist.getDeliveryStatus(user) <> ENABLED:
            continue
        # Otherwise, decide whether they get MIME or RFC 1153 digests
        if mlist.getMemberOption(user, mm_cfg.DisableMime):
            plainrecips.append(user)
        else:
            mimerecips.append(user)
    # Zap this since we're now delivering the last digest to these folks.
    mlist.one_last_digest.clear()
    # MIME
    virginq.enqueue(mimemsg,
                    recips=mimerecips,
                    listname=mlist.internal_name(),
                    isdigest=True)
    # RFC 1153
    rfc1153msg.set_payload(to_cset_out(plainmsg.getvalue(), lcset), lcset)
    virginq.enqueue(rfc1153msg,
                    recips=plainrecips,
                    listname=mlist.internal_name(),
                    isdigest=True)
Beispiel #20
0
def show_helds_overview(mlist, form):
    # Sort the held messages by sender
    bysender = helds_by_sender(mlist)
    if not bysender:
        return 0
    form.AddItem('<hr>')
    form.AddItem(Center(Header(2, _('Held Messages'))))
    # Add the by-sender overview tables
    admindburl = mlist.GetScriptURL('admindb', absolute=1)
    table = Table(border=0)
    form.AddItem(table)
    senders = bysender.keys()
    senders.sort()
    for sender in senders:
        qsender = quote_plus(sender)
        esender = Utils.websafe(sender)
        senderurl = admindburl + '?sender=' + qsender
        # The encompassing sender table
        stable = Table(border=1)
        stable.AddRow([Center(Bold(_('From:')).Format() + esender)])
        stable.AddCellInfo(stable.GetCurrentRowIndex(), 0, colspan=2)
        left = Table(border=0)
        left.AddRow([_('Action to take on all these held messages:')])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        btns = hacky_radio_buttons(
            'senderaction-' + qsender,
            (_('Defer'), _('Accept'), _('Reject'), _('Discard')),
            (mm_cfg.DEFER, mm_cfg.APPROVE, mm_cfg.REJECT, mm_cfg.DISCARD),
            (1, 0, 0, 0))
        left.AddRow([btns])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            CheckBox('senderpreserve-' + qsender, 1).Format() + '&nbsp;' +
            _('Preserve messages for the site administrator')
        ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            CheckBox('senderforward-' + qsender, 1).Format() + '&nbsp;' +
            _('Forward messages (individually) to:')
        ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            TextBox('senderforwardto-' + qsender, value=mlist.GetOwnerEmail())
        ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        # If the sender is a member and the message is being held due to a
        # moderation bit, give the admin a chance to clear the member's mod
        # bit.  If this sender is not a member and is not already on one of
        # the sender filters, then give the admin a chance to add this sender
        # to one of the filters.
        if mlist.isMember(sender):
            if mlist.getMemberOption(sender, mm_cfg.Moderate):
                left.AddRow([
                    CheckBox('senderclearmodp-' + qsender, 1).Format() +
                    '&nbsp;' + _("Clear this member's <em>moderate</em> flag")
                ])
            else:
                left.AddRow(
                    [_('<em>The sender is now a member of this list</em>')])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        elif sender not in (mlist.accept_these_nonmembers +
                            mlist.hold_these_nonmembers +
                            mlist.reject_these_nonmembers +
                            mlist.discard_these_nonmembers):
            left.AddRow([
                CheckBox('senderfilterp-' + qsender, 1).Format() + '&nbsp;' +
                _('Add <b>%(esender)s</b> to one of these sender filters:')
            ])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
            btns = hacky_radio_buttons(
                'senderfilter-' + qsender,
                (_('Accepts'), _('Holds'), _('Rejects'), _('Discards')),
                (mm_cfg.ACCEPT, mm_cfg.HOLD, mm_cfg.REJECT, mm_cfg.DISCARD),
                (0, 0, 0, 1))
            left.AddRow([btns])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
            if sender not in mlist.ban_list:
                left.AddRow([
                    CheckBox('senderbanp-' + qsender, 1).Format() + '&nbsp;' +
                    _("""Ban <b>%(esender)s</b> from ever subscribing to this
                    mailing list""")
                ])
                left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        right = Table(border=0)
        right.AddRow([
            _("""Click on the message number to view the individual
            message, or you can """) +
            Link(senderurl, _('view all messages from %(esender)s')).Format()
        ])
        right.AddCellInfo(right.GetCurrentRowIndex(), 0, colspan=2)
        right.AddRow(['&nbsp;', '&nbsp;'])
        counter = 1
        for id in bysender[sender]:
            info = mlist.GetRecord(id)
            ptime, sender, subject, reason, filename, msgdata = info
            # BAW: This is really the size of the message pickle, which should
            # be close, but won't be exact.  Sigh, good enough.
            try:
                size = os.path.getsize(os.path.join(mm_cfg.DATA_DIR, filename))
            except OSError, e:
                if e.errno <> errno.ENOENT: raise
                # This message must have gotten lost, i.e. it's already been
                # handled by the time we got here.
                mlist.HandleRequest(id, mm_cfg.DISCARD)
                continue
            dispsubj = Utils.oneline(
                subject, Utils.GetCharSet(mlist.preferred_language))
            t = Table(border=0)
            t.AddRow([
                Link(admindburl + '?msgid=%d' % id, '[%d]' % counter),
                Bold(_('Subject:')),
                Utils.websafe(dispsubj)
            ])
            t.AddRow(['&nbsp;', Bold(_('Size:')), str(size) + _(' bytes')])
            if reason:
                reason = _(reason)
            else:
                reason = _('not available')
            t.AddRow(['&nbsp;', Bold(_('Reason:')), reason])
            # Include the date we received the message, if available
            when = msgdata.get('received_time')
            if when:
                t.AddRow(['&nbsp;', Bold(_('Received:')), time.ctime(when)])
            t.AddRow([InputObj(qsender, 'hidden', str(id), False).Format()])
            counter += 1
            right.AddRow([t])
        stable.AddRow([left, right])
        table.AddRow([stable])
 def _decode(h):
     if not h:
         return h
     return Utils.oneline(h, Utils.GetCharSet(mlist.preferred_language))
def save_attachment(mlist, msg, dir, filter_html=True):
    fsdir = os.path.join(mlist.archive_dir(), dir)
    makedirs(fsdir)
    # Figure out the attachment type and get the decoded data
    decodedpayload = msg.get_payload(decode=True)
    # BAW: mimetypes ought to handle non-standard, but commonly found types,
    # e.g. image/jpg (should be image/jpeg).  For now we just store such
    # things as application/octet-streams since that seems the safest.
    ctype = msg.get_content_type()
    # i18n file name is encoded
    lcset = Utils.GetCharSet(mlist.preferred_language)
    filename = Utils.oneline(msg.get_filename(''), lcset)
    filename, fnext = os.path.splitext(filename)
    # For safety, we should confirm this is valid ext for content-type
    # but we can use fnext if we introduce fnext filtering
    if mm_cfg.SCRUBBER_USE_ATTACHMENT_FILENAME_EXTENSION:
        # HTML message doesn't have filename :-(
        ext = fnext or guess_extension(ctype, fnext)
    else:
        ext = guess_extension(ctype, fnext)
    if not ext:
        # We don't know what it is, so assume it's just a shapeless
        # application/octet-stream, unless the Content-Type: is
        # message/rfc822, in which case we know we'll coerce the type to
        # text/plain below.
        if ctype == 'message/rfc822':
            ext = '.txt'
        else:
            ext = '.bin'
    # Allow only alphanumerics, dash, underscore, and dot
    ext = sre.sub('', ext)
    path = None
    # We need a lock to calculate the next attachment number
    lockfile = os.path.join(fsdir, 'attachments.lock')
    lock = LockFile.LockFile(lockfile)
    lock.lock()
    try:
        # Now base the filename on what's in the attachment, uniquifying it if
        # necessary.
        if not filename or mm_cfg.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME:
            filebase = 'attachment'
        else:
            # Sanitize the filename given in the message headers
            parts = pre.split(filename)
            filename = parts[-1]
            # Strip off leading dots
            filename = dre.sub('', filename)
            # Allow only alphanumerics, dash, underscore, and dot
            filename = sre.sub('', filename)
            # If the filename's extension doesn't match the type we guessed,
            # which one should we go with?  For now, let's go with the one we
            # guessed so attachments can't lie about their type.  Also, if the
            # filename /has/ no extension, then tack on the one we guessed.
            # The extension was removed from the name above.
            filebase = filename
        # Now we're looking for a unique name for this file on the file
        # system.  If msgdir/filebase.ext isn't unique, we'll add a counter
        # after filebase, e.g. msgdir/filebase-cnt.ext
        counter = 0
        extra = ''
        while True:
            path = os.path.join(fsdir, filebase + extra + ext)
            # Generally it is not a good idea to test for file existance
            # before just trying to create it, but the alternatives aren't
            # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't
            # NFS-safe).  Besides, we have an exclusive lock now, so we're
            # guaranteed that no other process will be racing with us.
            if os.path.exists(path):
                counter += 1
                extra = '-%04d' % counter
            else:
                break
    finally:
        lock.unlock()
    # `path' now contains the unique filename for the attachment.  There's
    # just one more step we need to do.  If the part is text/html and
    # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be
    # here), then send the attachment through the filter program for
    # sanitization
    if filter_html and ctype == 'text/html':
        base, ext = os.path.splitext(path)
        tmppath = base + '-tmp' + ext
        fp = open(tmppath, 'wb')
        try:
            fp.write(decodedpayload)
            fp.close()
            cmd = mm_cfg.ARCHIVE_HTML_SANITIZER % {'filename': tmppath}
            progfp = os.popen(cmd, 'r')
            decodedpayload = progfp.read()
            status = progfp.close()
            if status:
                syslog('error',
                       'HTML sanitizer exited with non-zero status: %s',
                       status)
        finally:
            os.unlink(tmppath)
        # BAW: Since we've now sanitized the document, it should be plain
        # text.  Blarg, we really want the sanitizer to tell us what the type
        # if the return data is. :(
        ext = '.txt'
        path = base + '.txt'
    # Is it a message/rfc822 attachment?
    elif ctype == 'message/rfc822':
        submsg = msg.get_payload()
        # BAW: I'm sure we can eventually do better than this. :(
        decodedpayload = Utils.websafe(str(submsg))
    fp = open(path, 'wb')
    fp.write(decodedpayload)
    fp.close()
    # Now calculate the url
    baseurl = mlist.GetBaseArchiveURL()
    # Private archives will likely have a trailing slash.  Normalize.
    if baseurl[-1] != '/':
        baseurl += '/'
    # A trailing space in url string may save users who are using
    # RFC-1738 compliant MUA (Not Mozilla).
    # Trailing space will definitely be a problem with format=flowed.
    # Bracket the URL instead.
    url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
    return url
Beispiel #23
0
 lcset = Utils.GetCharSet(mlist.preferred_language)
 if mcset <> lcset:
     try:
         body = unicode(body, mcset).encode(lcset, 'replace')
     except (LookupError, UnicodeError, ValueError):
         pass
 hdrtxt = NL.join(['%s: %s' % (k, v) for k, v in msg.items()])
 hdrtxt = Utils.websafe(hdrtxt)
 # Okay, we've reconstituted the message just fine.  Now for the fun part!
 t = Table(cellspacing=0, cellpadding=0, width='100%')
 t.AddRow([Bold(_('From:')), sender])
 row, col = t.GetCurrentRowIndex(), t.GetCurrentCellIndex()
 t.AddCellInfo(row, col - 1, align='right')
 t.AddRow(
     [Bold(_('Subject:')),
      Utils.websafe(Utils.oneline(subject, lcset))])
 t.AddCellInfo(row + 1, col - 1, align='right')
 t.AddRow([Bold(_('Reason:')), _(reason)])
 t.AddCellInfo(row + 2, col - 1, align='right')
 when = msgdata.get('received_time')
 if when:
     t.AddRow([Bold(_('Received:')), time.ctime(when)])
     t.AddCellInfo(row + 2, col - 1, align='right')
 # We can't use a RadioButtonArray here because horizontal placement can be
 # confusing to the user and vertical placement takes up too much
 # real-estate.  This is a hack!
 buttons = Table(cellspacing="5", cellpadding="0")
 buttons.AddRow(
     map(lambda x, s='&nbsp;' * 5: s + x + s,
         (_('Defer'), _('Approve'), _('Reject'), _('Discard'))))
 buttons.AddRow([
Beispiel #24
0
def get_attachment_fname(mlist, msg):
    # i18n file name is encoded
    lcset = Utils.GetCharSet(mlist.preferred_language)
    filename = Utils.oneline(msg.get_filename(''), lcset)
    # filename can be 'str' or unicode
    return remove_accents(filename).encode('ascii')
Beispiel #25
0
def process(mlist, msg, msgdata=None):
    sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER
    outer = True
    in_pipeline = False
    patches = None
    sigs = None
    if msgdata is None:
        msgdata = {}
    if msgdata:
        # msgdata is available if it is in GLOBAL_PIPELINE
        # ie. not in digest or archiver
        # check if the list owner want to scrub regular delivery
        # Disabled - function split, attachments saved in pipeline,
        # if not mlist.scrub_nondigest:
        #    return
        in_pipeline = True
        patches = []
        sigs = []

    dir = calculate_attachments_dir(mlist, msg, msgdata)
    charset = None
    lcset = Utils.GetCharSet(mlist.preferred_language)
    lcset_out = Charset(lcset).output_charset or lcset
    # Now walk over all subparts of this message and scrub out various types
    format = delsp = None
    for part in msg.walk():
        ctype = part.get_content_type()
        # If the part is text/plain, we leave it alone
        if ctype == "text/plain":
            # We need to choose a charset for the scrubbed message, so we'll
            # arbitrarily pick the charset of the first text/plain part in the
            # message.
            # MAS: Also get the RFC 3676 stuff from this part. This seems to
            # work OK for scrub_nondigest.  It will also work as far as
            # scrubbing messages for the archive is concerned, but pipermail
            # doesn't pay any attention to the RFC 3676 parameters.  The plain
            # format digest is going to be a disaster in any case as some of
            # messages will be format="flowed" and some not.  ToDigest creates
            # its own Content-Type: header for the plain digest which won't
            # have RFC 3676 parameters. If the message Content-Type: headers
            # are retained for display in the digest, the parameters will be
            # there for information, but not for the MUA. This is the best we
            # can do without having get_payload() process the parameters.
            if charset is None:
                charset = part.get_content_charset(lcset)
                format = part.get_param("format")
                delsp = part.get_param("delsp")
            # TK: if part is attached then check charset and scrub if none
            if part.get("content-disposition") and not part.get_content_charset():
                if in_pipeline:
                    omask = os.umask(002)
                    try:
                        url = save_attachment(mlist, part, dir, patches=patches, sigs=sigs)
                        part[mm_cfg.SCRUBBER_URL_HEADER] = url
                    finally:
                        os.umask(omask)
                else:
                    url = "<" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">"
                    filename = part.get_filename(_("not available"))
                    filename = Utils.oneline(filename, lcset)
                    replace_payload_by_text(
                        part,
                        _(
                            """\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
URL: %(url)s
"""
                        ),
                        lcset,
                    )
            elif mm_cfg.SCRUBBER_ARCHIVE_ALL_TEXT:
                if in_pipeline:
                    # clearsigned or attached plaintext that will be shown, still archive the copy
                    omask = os.umask(002)
                    try:
                        url = save_attachment(mlist, part, dir, patches=patches, sigs=sigs)
                        part[mm_cfg.SCRUBBER_URL_HEADER] = url
                    finally:
                        os.umask(omask)
                elif outer and not msg.is_multipart():
                    # whole email is only one plaintext. add URL here
                    payload = msg.get_payload(decode=True)
                    del msg["content-type"]
                    del msg["content-transfer-encoding"]
                    payload = "URL: <" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">\n" + payload
                    msg.set_payload(payload)

        elif ctype == "text/html" and isinstance(sanitize, IntType):
            if sanitize == 0:
                if outer:
                    raise DiscardMessage
                replace_payload_by_text(
                    part,
                    _("HTML attachment scrubbed and removed"),
                    # Adding charset arg and removing content-type
                    # sets content-type to text/plain
                    lcset,
                )
            elif sanitize == 2:
                # By leaving it alone, Pipermail will automatically escape it
                pass
            elif sanitize == 3:
                if in_pipeline:
                    # Pull it out as an attachment but leave it unescaped.  This
                    # is dangerous, but perhaps useful for heavily moderated
                    # lists.
                    omask = os.umask(002)
                    try:
                        url = save_attachment(mlist, part, dir, filter_html=False, patches=patches, sigs=sigs)
                        part[mm_cfg.SCRUBBER_URL_HEADER] = url
                    finally:
                        os.umask(omask)
                else:
                    url = "<" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">"
                    replace_payload_by_text(
                        part,
                        _(
                            """\
An HTML attachment was scrubbed...
URL: %(url)s
"""
                        ),
                        lcset,
                    )
            else:
                if in_pipeline:
                    # TODO if in_pipeline should preserve original attachment. but no biggie
                    # HTML-escape it and store it as an attachment, but make it
                    # look a /little/ bit prettier. :(
                    payload = Utils.websafe(part.get_payload(decode=True))
                    # For whitespace in the margin, change spaces into
                    # non-breaking spaces, and tabs into 8 of those.  Then use a
                    # mono-space font.  Still looks hideous to me, but then I'd
                    # just as soon discard them.
                    def doreplace(s):
                        return s.expandtabs(8).replace(" ", "&nbsp;")

                    lines = [doreplace(s) for s in payload.split("\n")]
                    payload = "<tt>\n" + BR.join(lines) + "\n</tt>\n"
                    part.set_payload(payload)
                    # We're replacing the payload with the decoded payload so this
                    # will just get in the way.
                    del part["content-transfer-encoding"]
                    omask = os.umask(002)
                    try:
                        url = save_attachment(mlist, part, dir, filter_html=False, patches=patches, sigs=sigs)
                        part[mm_cfg.SCRUBBER_URL_HEADER] = url
                    finally:
                        os.umask(omask)
                else:
                    url = "<" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">"
                    replace_payload_by_text(
                        part,
                        _(
                            """\
An HTML attachment was scrubbed...
URL: %(url)s
"""
                        ),
                        lcset,
                    )
        elif ctype == "message/rfc822":
            if in_pipeline:
                omask = os.umask(002)
                try:
                    url = save_attachment(mlist, part, dir)
                    part[mm_cfg.SCRUBBER_URL_HEADER] = url
                finally:
                    os.umask(omask)
            else:
                # This part contains a submessage, so it too needs scrubbing
                submsg = part.get_payload(0)
                url = "<" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">"
                subject = submsg.get("subject", _("no subject"))
                subject = Utils.oneline(subject, lcset)
                date = submsg.get("date", _("no date"))
                who = submsg.get("from", _("unknown sender"))
                size = len(str(submsg))
                replace_payload_by_text(
                    part,
                    _(
                        """\
An embedded message was scrubbed...
From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
URL: %(url)s
"""
                    ),
                    lcset,
                )
        # If the message isn't a multipart, then we'll strip it out as an
        # attachment that would have to be separately downloaded.  Pipermail
        # will transform the url into a hyperlink.
        elif part.get_payload() and not part.is_multipart():
            payload = part.get_payload(decode=True)
            # XXX Under email 2.5, it is possible that payload will be None.
            # This can happen when you have a Content-Type: multipart/* with
            # only one part and that part has two blank lines between the
            # first boundary and the end boundary.  In email 3.0 you end up
            # with a string in the payload.  I think in this case it's safe to
            # ignore the part.
            if payload is None or payload.strip() == "":
                continue
            if in_pipeline:
                omask = os.umask(002)
                try:
                    url = save_attachment(mlist, part, dir, patches=patches, sigs=sigs)
                    part[mm_cfg.SCRUBBER_URL_HEADER] = url
                finally:
                    os.umask(omask)
            else:
                ctype = part.get_content_type()
                size = len(payload)
                url = "<" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">"
                desc = part.get("content-description", _("not available"))
                desc = Utils.oneline(desc, lcset)
                filename = part.get_filename(_("not available"))
                filename = Utils.oneline(filename, lcset)
                replace_payload_by_text(
                    part,
                    _(
                        """\
A non-text attachment was scrubbed...
Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
URL: %(url)s
"""
                    ),
                    lcset,
                )
        outer = False
    # We still have to sanitize multipart messages to flat text because
    # Pipermail can't handle messages with list payloads.  This is a kludge;
    # def (n) clever hack ;).
    if msg.is_multipart() and not in_pipeline:
        # By default we take the charset of the first text/plain part in the
        # message, but if there was none, we'll use the list's preferred
        # language's charset.
        if not charset or charset == "us-ascii":
            charset = lcset_out
        else:
            # normalize to the output charset if input/output are different
            charset = Charset(charset).output_charset or charset
        # We now want to concatenate all the parts which have been scrubbed to
        # text/plain, into a single text/plain payload.  We need to make sure
        # all the characters in the concatenated string are in the same
        # encoding, so we'll use the 'replace' key in the coercion call.
        # BAW: Martin's original patch suggested we might want to try
        # generalizing to utf-8, and that's probably a good idea (eventually).
        text = []
        for part in msg.walk():
            # TK: bug-id 1099138 and multipart
            # MAS test payload - if part may fail if there are no headers.
            if not part.get_payload() or part.is_multipart():
                continue
            # All parts should be scrubbed to text/plain by now, except
            # if sanitize == 2, there could be text/html parts so keep them
            # but skip any other parts.
            partctype = part.get_content_type()
            if partctype <> "text/plain" and (partctype <> "text/html" or sanitize <> 2):
                text.append(_("Skipped content of type %(partctype)s\n"))
                continue
            try:
                t = part.get_payload(decode=True) or ""
            # MAS: TypeError exception can occur if payload is None. This
            # was observed with a message that contained an attached
            # message/delivery-status part. Because of the special parsing
            # of this type, this resulted in a text/plain sub-part with a
            # null body. See bug 1430236.
            except (binascii.Error, TypeError):
                t = part.get_payload() or ""
            # TK: get_content_charset() returns 'iso-2022-jp' for internally
            # crafted (scrubbed) 'euc-jp' text part. So, first try
            # get_charset(), then get_content_charset() for the parts
            # which are already embeded in the incoming message.
            partcharset = part.get_charset()
            if partcharset:
                partcharset = str(partcharset)
            else:
                partcharset = part.get_content_charset()
            if partcharset and partcharset <> charset:
                try:
                    t = unicode(t, partcharset, "replace")
                except (UnicodeError, LookupError, ValueError, AssertionError):
                    # We can get here if partcharset is bogus in come way.
                    # Replace funny characters.  We use errors='replace'
                    t = unicode(t, "ascii", "replace")
                try:
                    # Should use HTML-Escape, or try generalizing to UTF-8
                    t = t.encode(charset, "replace")
                except (UnicodeError, LookupError, ValueError, AssertionError):
                    # if the message charset is bogus, use the list's.
                    t = t.encode(lcset, "replace")
            # Separation is useful
            if isinstance(t, StringType):
                # omit empty parts
                if t.strip(" \t\r\n") != "":
                    if not t.endswith("\n"):
                        t += "\n"
                    if mm_cfg.SCRUBBER_ARCHIVE_ALL_TEXT:
                        # Add link to archived part if it wasn't archived already
                        url = "URL: <" + part.get(mm_cfg.SCRUBBER_URL_HEADER, "N/A") + ">\n"
                        if not t.endswith(url):
                            t = url + t
                            filename = part.get_filename()
                            if filename:
                                filename = Utils.oneline(filename, lcset)
                                t = "Name: " + filename + "\n" + t

                    text.append(t)
        # Now join the text and set the payload
        sep = _("-------------- next part --------------\n")
        # The i18n separator is in the list's charset. Coerce it to the
        # message charset.
        try:
            s = unicode(sep, lcset, "replace")
            sep = s.encode(charset, "replace")
        except (UnicodeError, LookupError, ValueError, AssertionError):
            pass
        replace_payload_by_text(msg, sep.join(text), charset)
        if format:
            msg.set_param("Format", format)
        if delsp:
            msg.set_param("DelSp", delsp)
    if in_pipeline:
        (patches, sigs) = process_signatures(mlist, patches, sigs)
        if patches:
            # X-Patches-Received: PatchID1=filename; PatchID2=filename
            processed = {}
            for p in patches:
                processed[p["id"]] = p["file"]
            msg.add_header("X-Patches-Received", None, **processed)
        if sigs:
            # output header in RFC compliant way, if multiple sigs with multiple keys per one patch, then delimited by dot, like:
            # X-Sigs-Received: PatchID1=KeyID1; PatchID2=KeyID2.KeyID3
            processed = {}
            for s in sigs:
                processed[s["phash"]] = processed.get(s["phash"], []) + [s["key"]]
            for k in processed.keys():
                # dot does not need escaping
                processed[k] = ".".join(processed[k])
            msg.add_header("X-Sigs-Received", None, **processed)
    return msg
   def __handlepost(self, record, value, comment, preserve, forward, addr):
       # For backwards compatibility with pre 2.0beta3
       ptime, sender, subject, reason, filename, msgdata = record
       path = os.path.join(mm_cfg.DATA_DIR, filename)
       # Handle message preservation
       if preserve:
           parts = os.path.split(path)[1].split(DASH)
           parts[0] = 'spam'
           spamfile = DASH.join(parts)
           # Preserve the message as plain text, not as a pickle
           try:
               fp = open(path)
           except IOError as e:
               if e.errno != errno.ENOENT: raise
               return LOST
           try:
               if path.endswith('.pck'):
                   msg = pickle.load(fp)
               else:
                   assert path.endswith('.txt'), '%s not .pck or .txt' % path
                   msg = fp.read()
           finally:
               fp.close()
           # Save the plain text to a .msg file, not a .pck file
           outpath = os.path.join(mm_cfg.SPAM_DIR, spamfile)
           head, ext = os.path.splitext(outpath)
           outpath = head + '.msg'
           outfp = open(outpath, 'wb')
           try:
               if path.endswith('.pck'):
                   g = Generator(outfp)
                   g.flatten(msg, 1)
               else:
                   outfp.write(msg)
           finally:
               outfp.close()
       # Now handle updates to the database
       rejection = None
       fp = None
       msg = None
       status = REMOVE
       if value == mm_cfg.DEFER:
           # Defer
           status = DEFER
       elif value == mm_cfg.APPROVE:
           # Approved.
           try:
               msg = readMessage(path)
           except IOError as e:
               if e.errno != errno.ENOENT: raise
               return LOST
           msg = readMessage(path)
           msgdata['approved'] = 1
           # adminapproved is used by the Emergency handler
           msgdata['adminapproved'] = 1
           # Calculate a new filebase for the approved message, otherwise
           # delivery errors will cause duplicates.
           try:
               del msgdata['filebase']
           except KeyError:
               pass
           # Queue the file for delivery by qrunner.  Trying to deliver the
           # message directly here can lead to a huge delay in web
           # turnaround.  Log the moderation and add a header.
           msg['X-Mailman-Approved-At'] = email.utils.formatdate(localtime=1)
           syslog('vette', '%s: held message approved, message-id: %s',
                  self.internal_name(), msg.get('message-id', 'n/a'))
           # Stick the message back in the incoming queue for further
           # processing.
           inq = get_switchboard(mm_cfg.INQUEUE_DIR)
           inq.enqueue(msg, _metadata=msgdata)
       elif value == mm_cfg.REJECT:
           # Rejected
           rejection = 'Refused'
           lang = self.getMemberLanguage(sender)
           subject = Utils.oneline(subject, Utils.GetCharSet(lang))
           self.__refuse(_('Posting of your message titled "%(subject)s"'),
                         sender,
                         comment or _('[No reason given]'),
                         lang=lang)
       else:
           assert value == mm_cfg.DISCARD
           # Discarded
           rejection = 'Discarded'
       # Forward the message
       if forward and addr:
           # If we've approved the message, we need to be sure to craft a
           # completely unique second message for the forwarding operation,
           # since we don't want to share any state or information with the
           # normal delivery.
           try:
               copy = readMessage(path)
           except IOError as e:
               if e.errno != errno.ENOENT: raise
               raise Errors.LostHeldMessage(path)
           # It's possible the addr is a comma separated list of addresses.
           addrs = getaddresses([addr])
           if len(addrs) == 1:
               realname, addr = addrs[0]
               # If the address getting the forwarded message is a member of
               # the list, we want the headers of the outer message to be
               # encoded in their language.  Otherwise it'll be the preferred
               # language of the mailing list.
               lang = self.getMemberLanguage(addr)
           else:
               # Throw away the realnames
               addr = [a for realname, a in addrs]
               # Which member language do we attempt to use?  We could use
               # the first match or the first address, but in the face of
               # ambiguity, let's just use the list's preferred language
               lang = self.preferred_language
           otrans = i18n.get_translation()
           i18n.set_language(lang)
           try:
               fmsg = Message.UserNotification(
                   addr,
                   self.GetBouncesEmail(),
                   _('Forward of moderated message'),
                   lang=lang)
           finally:
               i18n.set_translation(otrans)
           fmsg.set_type('message/rfc822')
           fmsg.attach(copy)
           fmsg.send(self)
       # Log the rejection
       if rejection:
           note = '''%(listname)s: %(rejection)s posting:
tFrom: %(sender)s
tSubject: %(subject)s''' % {
               'listname': self.internal_name(),
               'rejection': rejection,
               'sender': str(sender).replace('%', '%%'),
               'subject': str(subject).replace('%', '%%'),
           }
           if comment:
               note += '\n\tReason: ' + comment.replace('%', '%%')
           syslog('vette', note)
       # Always unlink the file containing the message text.  It's not
       # necessary anymore, regardless of the disposition of the message.
       if status != DEFER:
           try:
               os.unlink(path)
           except OSError as e:
               if e.errno != errno.ENOENT: raise
               # We lost the message text file.  Clean up our housekeeping
               # and inform of this status.
               return LOST
       return status
Beispiel #27
0
def show_helds_overview(mlist, form):
    # Sort the held messages by sender
    bysender = helds_by_sender(mlist)
    if not bysender:
        return 0
    form.AddItem('<hr>')
    form.AddItem(Center(Header(2, _('Held Messages'))))
    # Add the by-sender overview tables
    admindburl = mlist.GetScriptURL('admindb', absolute=1)
    table = Table(border=0)
    form.AddItem(table)
    senders = bysender.keys()
    senders.sort()
    for sender in senders:
        qsender = quote_plus(sender)
        esender = Utils.websafe(sender)
        senderurl = admindburl + '?sender=' + qsender
        # The encompassing sender table
        stable = Table(border=1)
        stable.AddRow([Center(Bold(_('From:')).Format() + esender)])
        stable.AddCellInfo(stable.GetCurrentRowIndex(), 0, colspan=2)
        left = Table(border=0)
        left.AddRow([_('Action to take on all these held messages:')])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        btns = hacky_radio_buttons(
            'senderaction-' + qsender,
            (_('Defer'), _('Accept'), _('Reject'), _('Discard')),
            (mm_cfg.DEFER, mm_cfg.APPROVE, mm_cfg.REJECT, mm_cfg.DISCARD),
            (1, 0, 0, 0))
        left.AddRow([btns])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            CheckBox('senderpreserve-' + qsender, 1).Format() +
            '&nbsp;' +
            _('Preserve messages for the site administrator')
            ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            CheckBox('senderforward-' + qsender, 1).Format() +
            '&nbsp;' +
            _('Forward messages (individually) to:')
            ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        left.AddRow([
            TextBox('senderforwardto-' + qsender,
                    value=mlist.GetOwnerEmail())
            ])
        left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        # If the sender is a member and the message is being held due to a
        # moderation bit, give the admin a chance to clear the member's mod
        # bit.  If this sender is not a member and is not already on one of
        # the sender filters, then give the admin a chance to add this sender
        # to one of the filters.
        if mlist.isMember(sender):
            if mlist.getMemberOption(sender, mm_cfg.Moderate):
                left.AddRow([
                    CheckBox('senderclearmodp-' + qsender, 1).Format() +
                    '&nbsp;' +
                    _("Clear this member's <em>moderate</em> flag")
                    ])
            else:
                left.AddRow(
                    [_('<em>The sender is now a member of this list</em>')])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        elif sender not in (mlist.accept_these_nonmembers +
                            mlist.hold_these_nonmembers +
                            mlist.reject_these_nonmembers +
                            mlist.discard_these_nonmembers):
            left.AddRow([
                CheckBox('senderfilterp-' + qsender, 1).Format() +
                '&nbsp;' +
                _('Add <b>%(esender)s</b> to one of these sender filters:')
                ])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
            btns = hacky_radio_buttons(
                'senderfilter-' + qsender,
                (_('Accepts'), _('Holds'), _('Rejects'), _('Discards')),
                (mm_cfg.ACCEPT, mm_cfg.HOLD, mm_cfg.REJECT, mm_cfg.DISCARD),
                (0, 0, 0, 1))
            left.AddRow([btns])
            left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
            if sender not in mlist.ban_list:
                left.AddRow([
                    CheckBox('senderbanp-' + qsender, 1).Format() +
                    '&nbsp;' +
                    _("""Ban <b>%(esender)s</b> from ever subscribing to this
                    mailing list""")])
                left.AddCellInfo(left.GetCurrentRowIndex(), 0, colspan=2)
        right = Table(border=0)
        right.AddRow([
            _("""Click on the message number to view the individual
            message, or you can """) +
            Link(senderurl, _('view all messages from %(esender)s')).Format()
            ])
        right.AddCellInfo(right.GetCurrentRowIndex(), 0, colspan=2)
        right.AddRow(['&nbsp;', '&nbsp;'])
        counter = 1
        for id in bysender[sender]:
            info = mlist.GetRecord(id)
            ptime, sender, subject, reason, filename, msgdata = info
            # BAW: This is really the size of the message pickle, which should
            # be close, but won't be exact.  Sigh, good enough.
            try:
                size = os.path.getsize(os.path.join(mm_cfg.DATA_DIR, filename))
            except OSError, e:
                if e.errno <> errno.ENOENT: raise
                # This message must have gotten lost, i.e. it's already been
                # handled by the time we got here.
                mlist.HandleRequest(id, mm_cfg.DISCARD)
                continue
            dispsubj = Utils.oneline(
                subject, Utils.GetCharSet(mlist.preferred_language))
            t = Table(border=0)
            t.AddRow([Link(admindburl + '?msgid=%d' % id, '[%d]' % counter),
                      Bold(_('Subject:')),
                      Utils.websafe(dispsubj)
                      ])
            t.AddRow(['&nbsp;', Bold(_('Size:')), str(size) + _(' bytes')])
            if reason:
                reason = _(reason)
            else:
                reason = _('not available')
            t.AddRow(['&nbsp;', Bold(_('Reason:')), reason])
            # Include the date we received the message, if available
            when = msgdata.get('received_time')
            if when:
                t.AddRow(['&nbsp;', Bold(_('Received:')),
                          time.ctime(when)])
            t.AddRow([InputObj(qsender, 'hidden', str(id), False).Format()])
            counter += 1
            right.AddRow([t])
        stable.AddRow([left, right])
        table.AddRow([stable])
def process(mlist, msg, msgdata=None):
    sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER
    outer = True
    if msgdata is None:
        msgdata = {}
    if msgdata:
        # msgdata is available if it is in GLOBAL_PIPELINE
        # ie. not in digest or archiver
        # check if the list owner want to scrub regular delivery
        if not mlist.scrub_nondigest:
            return
    dir = calculate_attachments_dir(mlist, msg, msgdata)
    charset = None
    lcset = Utils.GetCharSet(mlist.preferred_language)
    lcset_out = Charset(lcset).output_charset or lcset
    # Now walk over all subparts of this message and scrub out various types
    format = delsp = None
    for part in msg.walk():
        ctype = part.get_content_type()
        # If the part is text/plain, we leave it alone
        if ctype == 'text/plain':
            # We need to choose a charset for the scrubbed message, so we'll
            # arbitrarily pick the charset of the first text/plain part in the
            # message.
            # MAS: Also get the RFC 3676 stuff from this part. This seems to
            # work OK for scrub_nondigest.  It will also work as far as
            # scrubbing messages for the archive is concerned, but pipermail
            # doesn't pay any attention to the RFC 3676 parameters.  The plain
            # format digest is going to be a disaster in any case as some of
            # messages will be format="flowed" and some not.  ToDigest creates
            # its own Content-Type: header for the plain digest which won't
            # have RFC 3676 parameters. If the message Content-Type: headers
            # are retained for display in the digest, the parameters will be
            # there for information, but not for the MUA. This is the best we
            # can do without having get_payload() process the parameters.
            if charset is None:
                charset = part.get_content_charset(lcset)
                format = part.get_param('format')
                delsp = part.get_param('delsp')
            # TK: if part is attached then check charset and scrub if none
            if part.get('content-disposition') and \
               not part.get_content_charset():
                omask = os.umask(0o02)
                try:
                    url = save_attachment(mlist, part, dir)
                finally:
                    os.umask(omask)
                filename = part.get_filename(_('not available'))
                filename = Utils.oneline(filename, lcset)
                replace_payload_by_text(
                    part,
                    _("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
URL: %(url)s
"""), lcset)
        elif ctype == 'text/html' and isinstance(sanitize, int):
            if sanitize == 0:
                if outer:
                    raise DiscardMessage
                replace_payload_by_text(
                    part,
                    _('HTML attachment scrubbed and removed'),
                    # Adding charset arg and removing content-type
                    # sets content-type to text/plain
                    lcset)
            elif sanitize == 2:
                # By leaving it alone, Pipermail will automatically escape it
                pass
            elif sanitize == 3:
                # Pull it out as an attachment but leave it unescaped.  This
                # is dangerous, but perhaps useful for heavily moderated
                # lists.
                omask = os.umask(0o02)
                try:
                    url = save_attachment(mlist, part, dir, filter_html=False)
                finally:
                    os.umask(omask)
                replace_payload_by_text(
                    part,
                    _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
            else:
                # HTML-escape it and store it as an attachment, but make it
                # look a /little/ bit prettier. :(
                payload = Utils.websafe(part.get_payload(decode=True))

                # For whitespace in the margin, change spaces into
                # non-breaking spaces, and tabs into 8 of those.  Then use a
                # mono-space font.  Still looks hideous to me, but then I'd
                # just as soon discard them.
                def doreplace(s):
                    return s.expandtabs(8).replace(' ', '&nbsp;')

                lines = [doreplace(s) for s in payload.split('\n')]
                payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
                part.set_payload(payload)
                # We're replacing the payload with the decoded payload so this
                # will just get in the way.
                del part['content-transfer-encoding']
                omask = os.umask(0o02)
                try:
                    url = save_attachment(mlist, part, dir, filter_html=False)
                finally:
                    os.umask(omask)
                replace_payload_by_text(
                    part,
                    _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
        elif ctype == 'message/rfc822':
            # This part contains a submessage, so it too needs scrubbing
            submsg = part.get_payload(0)
            omask = os.umask(0o02)
            try:
                url = save_attachment(mlist, part, dir)
            finally:
                os.umask(omask)
            subject = submsg.get('subject', _('no subject'))
            subject = Utils.oneline(subject, lcset)
            date = submsg.get('date', _('no date'))
            who = submsg.get('from', _('unknown sender'))
            size = len(str(submsg))
            replace_payload_by_text(
                part,
                _("""\
An embedded message was scrubbed...
From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
URL: %(url)s
"""), lcset)
        # If the message isn't a multipart, then we'll strip it out as an
        # attachment that would have to be separately downloaded.  Pipermail
        # will transform the url into a hyperlink.
        elif part.get_payload() and not part.is_multipart():
            payload = part.get_payload(decode=True)
            ctype = part.get_content_type()
            # XXX Under email 2.5, it is possible that payload will be None.
            # This can happen when you have a Content-Type: multipart/* with
            # only one part and that part has two blank lines between the
            # first boundary and the end boundary.  In email 3.0 you end up
            # with a string in the payload.  I think in this case it's safe to
            # ignore the part.
            if payload is None:
                continue
            size = len(payload)
            omask = os.umask(0o02)
            try:
                url = save_attachment(mlist, part, dir)
            finally:
                os.umask(omask)
            desc = part.get('content-description', _('not available'))
            desc = Utils.oneline(desc, lcset)
            filename = part.get_filename(_('not available'))
            filename = Utils.oneline(filename, lcset)
            replace_payload_by_text(
                part,
                _("""\
A non-text attachment was scrubbed...
Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
URL: %(url)s
"""), lcset)
        outer = False
    # We still have to sanitize multipart messages to flat text because
    # Pipermail can't handle messages with list payloads.  This is a kludge;
    # def (n) clever hack ;).
    if msg.is_multipart():
        # By default we take the charset of the first text/plain part in the
        # message, but if there was none, we'll use the list's preferred
        # language's charset.
        if not charset or charset == 'us-ascii':
            charset = lcset_out
        else:
            # normalize to the output charset if input/output are different
            charset = Charset(charset).output_charset or charset
        # We now want to concatenate all the parts which have been scrubbed to
        # text/plain, into a single text/plain payload.  We need to make sure
        # all the characters in the concatenated string are in the same
        # encoding, so we'll use the 'replace' key in the coercion call.
        # BAW: Martin's original patch suggested we might want to try
        # generalizing to utf-8, and that's probably a good idea (eventually).
        text = []
        for part in msg.walk():
            # TK: bug-id 1099138 and multipart
            # MAS test payload - if part may fail if there are no headers.
            if not part.get_payload() or part.is_multipart():
                continue
            # All parts should be scrubbed to text/plain by now, except
            # if sanitize == 2, there could be text/html parts so keep them
            # but skip any other parts.
            partctype = part.get_content_type()
            if partctype != 'text/plain' and (partctype != 'text/html'
                                              or sanitize != 2):
                text.append(_('Skipped content of type %(partctype)s\n'))
                continue
            try:
                t = part.get_payload(decode=True) or ''
            # MAS: TypeError exception can occur if payload is None. This
            # was observed with a message that contained an attached
            # message/delivery-status part. Because of the special parsing
            # of this type, this resulted in a text/plain sub-part with a
            # null body. See bug 1430236.
            except (binascii.Error, TypeError):
                t = part.get_payload() or ''
            # TK: get_content_charset() returns 'iso-2022-jp' for internally
            # crafted (scrubbed) 'euc-jp' text part. So, first try
            # get_charset(), then get_content_charset() for the parts
            # which are already embeded in the incoming message.
            partcharset = part.get_charset()
            if partcharset:
                partcharset = str(partcharset)
            else:
                partcharset = part.get_content_charset()
            if partcharset and partcharset != charset:
                try:
                    t = str(t, partcharset, 'replace')
                except (UnicodeError, LookupError, ValueError, AssertionError):
                    # We can get here if partcharset is bogus in come way.
                    # Replace funny characters.  We use errors='replace'
                    t = str(t, 'ascii', 'replace')
                try:
                    # Should use HTML-Escape, or try generalizing to UTF-8
                    t = t.encode(charset, 'replace')
                except (UnicodeError, LookupError, ValueError, AssertionError):
                    # if the message charset is bogus, use the list's.
                    t = t.encode(lcset, 'replace')
            # Separation is useful
            if isinstance(t, str):
                if not t.endswith('\n'):
                    t += '\n'
                text.append(t)
        # Now join the text and set the payload
        sep = _('-------------- next part --------------\n')
        # The i18n separator is in the list's charset. Coerce it to the
        # message charset.
        try:
            s = str(sep, lcset, 'replace')
            sep = s.encode(charset, 'replace')
        except (UnicodeError, LookupError, ValueError, AssertionError):
            pass
        replace_payload_by_text(msg, sep.join(text), charset)
        if format:
            msg.set_param('Format', format)
        if delsp:
            msg.set_param('DelSp', delsp)
    return msg
Beispiel #29
0
         pass
     # Queue the file for delivery by qrunner.  Trying to deliver the
     # message directly here can lead to a huge delay in web
     # turnaround.  Log the moderation and add a header.
     msg['X-Mailman-Approved-At'] = email.Utils.formatdate(localtime=1)
     syslog('vette', 'held message approved, message-id: %s',
            msg.get('message-id', 'n/a'))
     # Stick the message back in the incoming queue for further
     # processing.
     inq = get_switchboard(mm_cfg.INQUEUE_DIR)
     inq.enqueue(msg, _metadata=msgdata)
 elif value == mm_cfg.REJECT:
     # Rejected
     rejection = 'Refused'
     lang = self.getMemberLanguage(sender)
     subject = Utils.oneline(subject, Utils.GetCharSet(lang))
     self.__refuse(_('Posting of your message titled "%(subject)s"'),
                   sender, comment or _('[No reason given]'),
                   lang=lang)
 else:
     assert value == mm_cfg.DISCARD
     # Discarded
     rejection = 'Discarded'
 # Forward the message
 if forward and addr:
     # If we've approved the message, we need to be sure to craft a
     # completely unique second message for the forwarding operation,
     # since we don't want to share any state or information with the
     # normal delivery.
     try:
         copy = readMessage(path)
def hold_for_approval(mlist, msg, msgdata, exc):
    # BAW: This should really be tied into the email confirmation system so
    # that the message can be approved or denied via email as well as the
    # web.
    #
    # XXX We use the weird type(type) construct below because in Python 2.1,
    # type is a function not a type and so can't be used as the second
    # argument in isinstance().  However, in Python 2.5, exceptions are
    # new-style classes and so are not of ClassType.
    # FIXME pzv
    if isinstance(exc, type) or isinstance(exc, type(type)):
        # Go ahead and instantiate it now.
        exc = exc()
    listname = mlist.real_name
    sender = msgdata.get('sender', msg.get_sender())
    usersubject = msg.get('subject')
    charset = Utils.GetCharSet(mlist.preferred_language)
    if usersubject:
        usersubject = Utils.oneline(usersubject, charset)
    else:
        usersubject = _('(no subject)')
    message_id = msg.get('message-id', 'n/a')
    owneraddr = mlist.GetOwnerEmail()
    adminaddr = mlist.GetBouncesEmail()
    requestaddr = mlist.GetRequestEmail()
    # We need to send both the reason and the rejection notice through the
    # translator again, because of the games we play above
    reason = Utils.wrap(exc.reason_notice())
    if isinstance(exc, NonMemberPost) and mlist.nonmember_rejection_notice:
        msgdata['rejection_notice'] = Utils.wrap(
            mlist.nonmember_rejection_notice.replace('%(listowner)s',
                                                     owneraddr))
    else:
        msgdata['rejection_notice'] = Utils.wrap(exc.rejection_notice(mlist))
    id = mlist.HoldMessage(msg, reason, msgdata)
    # Now we need to craft and send a message to the list admin so they can
    # deal with the held message.
    d = {
        'listname': listname,
        'hostname': mlist.host_name,
        'reason': _(reason),
        'sender': sender,
        'subject': usersubject,
        'admindb_url': mlist.GetScriptURL('admindb', absolute=1),
    }
    # We may want to send a notification to the original sender too
    fromusenet = msgdata.get('fromusenet')
    # Since we're sending two messages, which may potentially be in different
    # languages (the user's preferred and the list's preferred for the admin),
    # we need to play some i18n games here.  Since the current language
    # context ought to be set up for the user, let's craft his message first.
    cookie = mlist.pend_new(Pending.HELD_MESSAGE, id)
    if not fromusenet and ackp(msg) and mlist.respond_to_post_requests and \
           mlist.autorespondToSender(sender, mlist.getMemberLanguage(sender)):
        # Get a confirmation cookie
        d['confirmurl'] = '%s/%s' % (mlist.GetScriptURL('confirm',
                                                        absolute=1), cookie)
        lang = msgdata.get('lang', mlist.getMemberLanguage(sender))
        subject = _('Your message to %(listname)s awaits moderator approval')
        text = Utils.maketext('postheld.txt', d, lang=lang, mlist=mlist)
        nmsg = Message.UserNotification(sender, owneraddr, subject, text, lang)
        nmsg.send(mlist)
    # Now the message for the list owners.  Be sure to include the list
    # moderators in this message.  This one should appear to come from
    # <list>-owner since we really don't need to do bounce processing on it.
    if mlist.admin_immed_notify:
        # Now let's temporarily set the language context to that which the
        # admin is expecting.
        otranslation = i18n.get_translation()
        i18n.set_language(mlist.preferred_language)
        try:
            lang = mlist.preferred_language
            charset = Utils.GetCharSet(lang)
            # We need to regenerate or re-translate a few values in d
            d['reason'] = _(reason)
            d['subject'] = usersubject
            # craft the admin notification message and deliver it
            subject = _('%(listname)s post from %(sender)s requires approval')
            nmsg = Message.UserNotification(owneraddr,
                                            owneraddr,
                                            subject,
                                            lang=lang)
            nmsg.set_type('multipart/mixed')
            text = MIMEText(Utils.maketext('postauth.txt',
                                           d,
                                           raw=1,
                                           mlist=mlist),
                            _charset=charset)
            dmsg = MIMEText(Utils.wrap(
                _("""\
If you reply to this message, keeping the Subject: header intact, Mailman will
discard the held message.  Do this if the message is spam.  If you reply to
this message and include an Approved: header with the list password in it, the
message will be approved for posting to the list.  The Approved: header can
also appear in the first line of the body of the reply.""")),
                            _charset=Utils.GetCharSet(lang))
            dmsg['Subject'] = 'confirm ' + cookie
            dmsg['Sender'] = requestaddr
            dmsg['From'] = requestaddr
            dmsg['Date'] = email.utils.formatdate(localtime=True)
            dmsg['Message-ID'] = Utils.unique_message_id(mlist)
            nmsg.attach(text)
            nmsg.attach(message.MIMEMessage(msg))
            nmsg.attach(message.MIMEMessage(dmsg))
            nmsg.send(mlist, **{'tomoderators': 1})
        finally:
            i18n.set_translation(otranslation)
    # Log the held message
    syslog('vette', '%s post from %s held, message-id=%s: %s', listname,
           sender, message_id, reason)
    # raise the specific MessageHeld exception to exit out of the message
    # delivery pipeline
    raise exc
Beispiel #31
0
def save_attachment(mlist, msg, dir, filter_html=True, patches=None, sigs=None):
    fsdir = os.path.join(mlist.archive_dir(), dir)
    makedirs(fsdir)
    # Figure out the attachment type and get the decoded data
    decodedpayload = msg.get_payload(decode=True)
    # BAW: mimetypes ought to handle non-standard, but commonly found types,
    # e.g. image/jpg (should be image/jpeg).  For now we just store such
    # things as application/octet-streams since that seems the safest.
    ctype = msg.get_content_type()
    # i18n file name is encoded
    lcset = Utils.GetCharSet(mlist.preferred_language)
    filename = Utils.oneline(msg.get_filename(""), lcset)
    # filename, fnext = os.path.splitext(filename) #won't work with double extensions like .patch.sig
    fnext = None
    try:
        (filename, fnext) = filename.split(".", 1)
        if filename == "":
            filename = None
        if fnext != "":
            fnext = "." + fnext
    except:
        pass
    # For safety, we should confirm this is valid ext for content-type
    # but we can use fnext if we introduce fnext filtering
    if mm_cfg.SCRUBBER_USE_ATTACHMENT_FILENAME_EXTENSION:
        # HTML message doesn't have filename :-(
        # if it's text/plain, use '.txt', otherwise we'd end with '.ksh' or so
        ext = fnext or guess_extension(ctype, ".txt")
    else:
        ext = guess_extension(ctype, fnext)
    if not ext:
        # We don't know what it is, so assume it's just a shapeless
        # application/octet-stream, unless the Content-Type: is
        # message/rfc822, in which case we know we'll coerce the type to
        # text/plain below.
        if ctype == "message/rfc822":
            ext = ".txt"
        else:
            ext = ".bin"
    # Allow only alphanumerics, dash, underscore, and dot
    ext = sre.sub("", ext)
    path = None
    extra = ""
    sha = ""
    msgfrom = ""
    do_write_file = True
    if mm_cfg.SCRUBBER_ADD_PAYLOAD_HASH_FILENAME or not filename:
        sha = msg.get(mm_cfg.SCRUBBER_SHA1SUM_HEADER)
        if sha:
            # no need to clutter headers
            del msg[mm_cfg.SCRUBBER_SHA1SUM_HEADER]
            msgfrom = msg[mm_cfg.SCRUBBER_SIGNEDBY_HEADER]
        else:
            sha = sha_new(decodedpayload).hexdigest()
    # We need a lock to calculate the next attachment number
    lockfile = os.path.join(fsdir, "attachments.lock")
    lock = LockFile.LockFile(lockfile)
    lock.lock()
    try:
        # Now base the filename on what's in the attachment, uniquifying it if
        # necessary.
        if not filename or mm_cfg.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME:
            filebase = "attachment"
        else:
            # Sanitize the filename given in the message headers
            parts = pre.split(filename)
            filename = parts[-1]
            # Strip off leading dots
            filename = dre.sub("", filename)
            # Allow only alphanumerics, dash, underscore, and dot
            filename = sre.sub("", filename)
            # If the filename's extension doesn't match the type we guessed,
            # which one should we go with?  For now, let's go with the one we
            # guessed so attachments can't lie about their type.  Also, if the
            # filename /has/ no extension, then tack on the one we guessed.
            # The extension was removed from the name above.
            filebase = filename

        # Now we're looking for a unique name for this file on the file
        # system.  If msgdir/filebase.ext isn't unique, we'll add a counter
        # after filebase, e.g. msgdir/filebase-cnt.ext
        counter = 0
        while True:
            path = os.path.join(fsdir, filebase + extra + ext)
            # Generally it is not a good idea to test for file existance
            # before just trying to create it, but the alternatives aren't
            # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't
            # NFS-safe).  Besides, we have an exclusive lock now, so we're
            # guaranteed that no other process will be racing with us.
            if os.path.exists(path):
                counter += 1
                extra = "-%04d" % counter
            else:
                break
        filename = filebase + extra + ext

        if mm_cfg.SCRUBBER_ADD_PAYLOAD_HASH_FILENAME:
            # Make content hash to attachment
            linkdir = os.path.join(fsdir, "..", "links")
            makedirs(linkdir)
            if msgfrom:
                dst = os.path.join(linkdir, msgfrom + "_" + sha)
            else:
                dst = os.path.join(linkdir, sha)
            src = os.path.join(fsdir, filename)
            try:
                os.symlink(src, dst)
            except:
                syslog("gpg", "Duplicate attachment: %s/%s msgfrom: %s sha1: %s" % (fsdir, filename, msgfrom, sha))
                # To deduplicate would need to parse, etc.
                # filename = os.readlink(dst)
                # do_write_file = False

    finally:
        lock.unlock()
    if do_write_file:
        # `path' now contains the unique filename for the attachment.  There's
        # just one more step we need to do.  If the part is text/html and
        # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be
        # here), then send the attachment through the filter program for
        # sanitization
        if filter_html and ctype == "text/html":
            base, ext = os.path.splitext(path)
            tmppath = base + "-tmp" + ext
            fp = open(tmppath, "w")
            try:
                fp.write(decodedpayload)
                fp.close()
                cmd = mm_cfg.ARCHIVE_HTML_SANITIZER % {"filename": tmppath}
                progfp = os.popen(cmd, "r")
                decodedpayload = progfp.read()
                status = progfp.close()
                if status:
                    syslog("error", "HTML sanitizer exited with non-zero status: %s", status)
            finally:
                os.unlink(tmppath)
            # BAW: Since we've now sanitized the document, it should be plain
            # text.  Blarg, we really want the sanitizer to tell us what the type
            # if the return data is. :(
            ext = ".txt"
            path = base + ".txt"
        # Is it a message/rfc822 attachment?
        elif ctype == "message/rfc822":
            submsg = msg.get_payload()
            # BAW: I'm sure we can eventually do better than this. :(
            decodedpayload = Utils.websafe(str(submsg))
        fp = open(path, "w")
        fp.write(decodedpayload)
        fp.close()
    # Now calculate the url
    baseurl = mlist.GetBaseArchiveURL()
    # Private archives will likely have a trailing slash.  Normalize.
    if baseurl[-1] <> "/":
        baseurl += "/"
    url = baseurl + "%s/%s%s%s" % (dir, filebase, extra, ext)
    if sha:
        url += "?sha1=" + sha

    if sigs is not None and (ext.endswith(".sig") or ctype == "application/pgp-signature"):
        sigs.append({"id": sha, "name": filebase, "file": os.path.join(dir, filename), "url": url})
    elif patches is not None and ctype != "text/html" and ctype != "message/rfc822":
        patches.append({"id": sha, "name": filebase, "file": os.path.join(dir, filename), "url": url})

    # A trailing space in url string may save users who are using
    # RFC-1738 compliant MUA (Not Mozilla).
    # Trailing space will definitely be a problem with format=flowed.
    # Bracket the URL instead.
    # '<' + url + '>' Done by caller instead.
    return url
Beispiel #32
0
def show_post_requests(mlist, id, info, total, count, form):
    # Mailman.ListAdmin.__handlepost no longer tests for pre 2.0beta3
    ptime, sender, subject, reason, filename, msgdata = info
    form.AddItem('<hr>')
    # Header shown on each held posting (including count of total)
    msg = _('Posting Held for Approval')
    if total != 1:
        msg += _(' (%(count)d of %(total)d)')
    form.AddItem(Center(Header(2, msg)))
    # We need to get the headers and part of the textual body of the message
    # being held.  The best way to do this is to use the email.parser to get
    # an actual object, which will be easier to deal with.  We probably could
    # just do raw reads on the file.
    try:
        msg = readMessage(os.path.join(mm_cfg.DATA_DIR, filename))
    except IOError as e:
        if e.errno != errno.ENOENT:
            raise
        form.AddItem(_('<em>Message with id #%(id)d was lost.'))
        form.AddItem('<p>')
        # BAW: kludge to remove id from requests.db.
        try:
            mlist.HandleRequest(id, mm_cfg.DISCARD)
        except Errors.LostHeldMessage:
            pass
        return
    except email.errors.MessageParseError:
        form.AddItem(_('<em>Message with id #%(id)d is corrupted.'))
        # BAW: Should we really delete this, or shuttle it off for site admin
        # to look more closely at?
        form.AddItem('<p>')
        # BAW: kludge to remove id from requests.db.
        try:
            mlist.HandleRequest(id, mm_cfg.DISCARD)
        except Errors.LostHeldMessage:
            pass
        return
    # Get the header text and the message body excerpt
    lines = []
    chars = 0
    # A negative value means, include the entire message regardless of size
    limit = mm_cfg.ADMINDB_PAGE_TEXT_LIMIT
    for line in email.iterators.body_line_iterator(msg, decode=True):
        lines.append(line)
        chars += len(line)
        if chars >= limit > 0:
            break
    # We may have gone over the limit on the last line, but keep the full line
    # anyway to avoid losing part of a multibyte character.
    body = EMPTYSTRING.join(lines)
    # Get message charset and try encode in list charset
    # We get it from the first text part.
    # We need to replace invalid characters here or we can throw an uncaught
    # exception in doc.Format().
    for part in msg.walk():
        if part.get_content_maintype() == 'text':
            # Watchout for charset= with no value.
            mcset = part.get_content_charset() or 'us-ascii'
            break
    else:
        mcset = 'us-ascii'
    lcset = Utils.GetCharSet(mlist.preferred_language)
    if mcset != lcset:
        try:
            body = str(body, mcset, 'replace')
        except (LookupError, UnicodeError, ValueError):
            pass
    hdrtxt = NL.join(['%s: %s' % (k, v) for k, v in list(msg.items())])
    hdrtxt = Utils.websafe(hdrtxt)
    # Okay, we've reconstituted the message just fine.  Now for the fun part!
    t = Table(cellspacing=0, cellpadding=0, width='100%')
    t.AddRow([Bold(_('From:')), sender])
    row, col = t.GetCurrentRowIndex(), t.GetCurrentCellIndex()
    t.AddCellInfo(row, col-1, align='right')
    t.AddRow([Bold(_('Subject:')),
              Utils.websafe(Utils.oneline(subject, lcset))])
    t.AddCellInfo(row+1, col-1, align='right')
    t.AddRow([Bold(_('Reason:')), _(reason)])
    t.AddCellInfo(row+2, col-1, align='right')
    when = msgdata.get('received_time')
    if when:
        t.AddRow([Bold(_('Received:')), time.ctime(when)])
        t.AddCellInfo(row+3, col-1, align='right')
    buttons = hacky_radio_buttons(id,
                (_('Defer'), _('Approve'), _('Reject'), _('Discard')),
                (mm_cfg.DEFER, mm_cfg.APPROVE, mm_cfg.REJECT, mm_cfg.DISCARD),
                (1, 0, 0, 0),
                spacing=5)
    t.AddRow([Bold(_('Action:')), buttons])
    t.AddCellInfo(t.GetCurrentRowIndex(), col-1, align='right')
    t.AddRow(['&nbsp;',
              '<label>' +
              CheckBox('preserve-%d' % id, 'on', 0).Format() +
              '&nbsp;' + _('Preserve message for site administrator') +
              '</label>'
              ])
    t.AddRow(['&nbsp;',
              '<label>' +
              CheckBox('forward-%d' % id, 'on', 0).Format() +
              '&nbsp;' + _('Additionally, forward this message to: ') +
              '</label>' +
              TextBox('forward-addr-%d' % id, size=47,
                      value=mlist.GetOwnerEmail()).Format()
              ])
    notice = msgdata.get('rejection_notice', _('[No explanation given]'))
    t.AddRow([
        Bold(_('If you reject this post,<br>please explain (optional):')),
        TextArea('comment-%d' % id, rows=4, cols=EXCERPT_WIDTH,
                 text = Utils.wrap(_(notice), column=80))
        ])
    row, col = t.GetCurrentRowIndex(), t.GetCurrentCellIndex()
    t.AddCellInfo(row, col-1, align='right')
    t.AddRow([Bold(_('Message Headers:')),
              TextArea('headers-%d' % id, hdrtxt,
                       rows=EXCERPT_HEIGHT, cols=EXCERPT_WIDTH, readonly=1)])
    row, col = t.GetCurrentRowIndex(), t.GetCurrentCellIndex()
    t.AddCellInfo(row, col-1, align='right')
    t.AddRow([Bold(_('Message Excerpt:')),
              TextArea('fulltext-%d' % id, Utils.websafe(body),
                       rows=EXCERPT_HEIGHT, cols=EXCERPT_WIDTH, readonly=1)])
    t.AddCellInfo(row+1, col-1, align='right')
    form.AddItem(t)
    form.AddItem('<p>')