예제 #1
0
def poll_newsgroup(mlist, conn, first, last, glock):
    listname = mlist.internal_name()
    # NEWNEWS is not portable and has synchronization issues.
    for num in range(first, last):
        glock.refresh()
        try:
            headers = conn.head(repr(num))[3]
            found_to = False
            beenthere = False
            for header in headers:
                i = header.find(':')
                value = header[:i].lower()
                if i > 0 and value == 'to':
                    found_to = True
                # FIXME 2010-02-16 barry use List-Post header.
                if value <> 'x-beenthere':
                    continue
                if header[i:] == ': %s' % mlist.posting_address:
                    beenthere = True
                    break
            if not beenthere:
                body = conn.body(repr(num))[3]
                # Usenet originated messages will not have a Unix envelope
                # (i.e. "From " header).  This breaks Pipermail archiving, so
                # we will synthesize one.  Be sure to use the format searched
                # for by mailbox.UnixMailbox._isrealfromline().  BAW: We use
                # the -bounces address here in case any downstream clients use
                # the envelope sender for bounces; I'm not sure about this,
                # but it's the closest to the old semantics.
                lines = [
                    'From %s  %s' %
                    (mlist.GetBouncesEmail(), time.ctime(time.time()))
                ]
                lines.extend(headers)
                lines.append('')
                lines.extend(body)
                lines.append('')
                p = Parser(Message.Message)
                try:
                    msg = p.parsestr(NL.join(lines))
                except email.Errors.MessageError as e:
                    log.error('email package exception for %s:%d\n%s',
                              mlist.linked_newsgroup, num, e)
                    raise _ContinueLoop
                if found_to:
                    del msg['X-Originally-To']
                    msg['X-Originally-To'] = msg['To']
                    del msg['To']
                msg['To'] = mlist.posting_address
                # Post the message to the locked list
                inq = Switchboard(config.INQUEUE_DIR)
                inq.enqueue(msg, listid=mlist.list_id, fromusenet=True)
                log.info('posted to list %s: %7d', listname, num)
        except nntplib.NNTPError as e:
            log.exception('NNTP error for list %s: %7d', listname, num)
        except _ContinueLoop:
            continue
        # Even if we don't post the message because it was seen on the
        # list already, update the watermark
        mlist.usenet_watermark = num
예제 #2
0
def poll_newsgroup(mlist, conn, first, last, glock):
    listname = mlist.internal_name()
    # NEWNEWS is not portable and has synchronization issues.
    for num in range(first, last):
        glock.refresh()
        try:
            headers = conn.head(repr(num))[3]
            found_to = False
            beenthere = False
            for header in headers:
                i = header.find(':')
                value = header[:i].lower()
                if i > 0 and value == 'to':
                    found_to = True
                # FIXME 2010-02-16 barry use List-Post header.
                if value <> 'x-beenthere':
                    continue
                if header[i:] == ': %s' % mlist.posting_address:
                    beenthere = True
                    break
            if not beenthere:
                body = conn.body(repr(num))[3]
                # Usenet originated messages will not have a Unix envelope
                # (i.e. "From " header).  This breaks Pipermail archiving, so
                # we will synthesize one.  Be sure to use the format searched
                # for by mailbox.UnixMailbox._isrealfromline().  BAW: We use
                # the -bounces address here in case any downstream clients use
                # the envelope sender for bounces; I'm not sure about this,
                # but it's the closest to the old semantics.
                lines = ['From %s  %s' % (mlist.GetBouncesEmail(),
                                          time.ctime(time.time()))]
                lines.extend(headers)
                lines.append('')
                lines.extend(body)
                lines.append('')
                p = Parser(Message.Message)
                try:
                    msg = p.parsestr(NL.join(lines))
                except email.Errors.MessageError as e:
                    log.error('email package exception for %s:%d\n%s',
                              mlist.linked_newsgroup, num, e)
                    raise _ContinueLoop
                if found_to:
                    del msg['X-Originally-To']
                    msg['X-Originally-To'] = msg['To']
                    del msg['To']
                msg['To'] = mlist.posting_address
                # Post the message to the locked list
                inq = Switchboard(config.INQUEUE_DIR)
                inq.enqueue(msg,
                            listid=mlist.list_id,
                            fromusenet=True)
                log.info('posted to list %s: %7d', listname, num)
        except nntplib.NNTPError as e:
            log.exception('NNTP error for list %s: %7d', listname, num)
        except _ContinueLoop:
            continue
        # Even if we don't post the message because it was seen on the
        # list already, update the watermark
        mlist.usenet_watermark = num
예제 #3
0
class Runner:
    is_queue_runner = True

    def __init__(self, name, slice=None):
        """Create a runner.

        :param slice: The slice number for this runner.  This is passed
            directly to the underlying `ISwitchboard` object.  This is ignored
            for runners that don't manage a queue.
        :type slice: int or None
        """
        # Grab the configuration section.
        self.name = name
        section = getattr(config, 'runner.' + name)
        substitutions = config.paths
        substitutions['name'] = name
        numslices = int(section.instances)
        # Check whether the runner is queue runner or not; non-queue runner
        # should not have queue_directory or switchboard instance.
        if self.is_queue_runner:
            self.queue_directory = expand(section.path, substitutions)
            self.switchboard = Switchboard(
                name, self.queue_directory, slice, numslices, True)
        else:
            self.queue_directory = None
            self.switchboard= None
        self.sleep_time = as_timedelta(section.sleep_time)
        # sleep_time is a timedelta; turn it into a float for time.sleep().
        self.sleep_float = (86400 * self.sleep_time.days +
                            self.sleep_time.seconds +
                            self.sleep_time.microseconds / 1.0e6)
        self.max_restarts = int(section.max_restarts)
        self.start = as_boolean(section.start)
        self._stop = False
        self.status = 0

    def __repr__(self):
        return '<{0} at {1:#x}>'.format(self.__class__.__name__, id(self))

    def signal_handler(self, signum, frame):
        signame = {
            signal.SIGTERM: 'SIGTERM',
            signal.SIGINT: 'SIGINT',
            signal.SIGUSR1: 'SIGUSR1',
            }.get(signum, signum)
        if signum in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
            self.stop()
            self.status = signum
            rlog.info('%s runner caught %s.  Stopping.', self.name, signame)
        elif signum == signal.SIGHUP:
            reopen()
            rlog.info('%s runner caught SIGHUP.  Reopening logs.', self.name)

    def set_signals(self):
        """See `IRunner`."""
        signal.signal(signal.SIGHUP, self.signal_handler)
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        signal.signal(signal.SIGUSR1, self.signal_handler)

    def stop(self):
        """See `IRunner`."""
        self._stop = True

    def run(self):
        """See `IRunner`."""
        # Start the main loop for this runner.
        try:
            while True:
                # Once through the loop that processes all the files in the
                # queue directory.
                filecnt = self._one_iteration()
                # Do the periodic work for the subclass.
                self._do_periodic()
                # If the stop flag is set, we're done.
                if self._stop:
                    break
                # Give the runner an opportunity to snooze for a while, but
                # pass it the file count so it can decide whether to do more
                # work now or not.
                self._snooze(filecnt)
        except KeyboardInterrupt:
            pass
        finally:
            self._clean_up()

    def _one_iteration(self):
        """See `IRunner`."""
        me = self.__class__.__name__
        dlog.debug('[%s] starting oneloop', me)
        # List all the files in our queue directory.  The switchboard is
        # guaranteed to hand us the files in FIFO order.
        files = self.switchboard.files
        for filebase in files:
            dlog.debug('[%s] processing filebase: %s', me, filebase)
            try:
                # Ask the switchboard for the message and metadata objects
                # associated with this queue file.
                msg, msgdata = self.switchboard.dequeue(filebase)
            except Exception as error:
                # This used to just catch email.Errors.MessageParseError, but
                # other problems can occur in message parsing, e.g.
                # ValueError, and exceptions can occur in unpickling too.  We
                # don't want the runner to die, so we just log and skip this
                # entry, but preserve it for analysis.
                self._log(error)
                elog.error('Skipping and preserving unparseable message: %s',
                           filebase)
                self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
                continue
            try:
                dlog.debug('[%s] processing onefile', me)
                self._process_one_file(msg, msgdata)
                dlog.debug('[%s] finishing filebase: %s', me, filebase)
                self.switchboard.finish(filebase)
            except Exception as error:
                # All runners that implement _dispose() must guarantee that
                # exceptions are caught and dealt with properly.  Still, there
                # may be a bug in the infrastructure, and we do not want those
                # to cause messages to be lost.  Any uncaught exceptions will
                # cause the message to be stored in the shunt queue for human
                # intervention.
                self._log(error)
                # Put a marker in the metadata for unshunting.
                msgdata['whichq'] = self.switchboard.name
                # It is possible that shunting can throw an exception, e.g. a
                # permissions problem or a MemoryError due to a really large
                # message.  Try to be graceful.
                try:
                    shunt = config.switchboards['shunt']
                    new_filebase = shunt.enqueue(msg, msgdata)
                    elog.error('SHUNTING: %s', new_filebase)
                    self.switchboard.finish(filebase)
                except Exception as error:
                    # The message wasn't successfully shunted.  Log the
                    # exception and try to preserve the original queue entry
                    # for possible analysis.
                    self._log(error)
                    elog.error(
                        'SHUNTING FAILED, preserving original entry: %s',
                        filebase)
                    self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
            # Other work we want to do each time through the loop.
            dlog.debug('[%s] doing periodic', me)
            self._do_periodic()
            dlog.debug('[%s] committing transaction', me)
            config.db.commit()
            dlog.debug('[%s] checking short circuit', me)
            if self._short_circuit():
                dlog.debug('[%s] short circuiting', me)
                break
        dlog.debug('[%s] ending oneloop: %s', me, len(files))
        return len(files)

    def _process_one_file(self, msg, msgdata):
        """See `IRunner`."""
        # Do some common sanity checking on the message metadata.  It's got to
        # be destined for a particular mailing list.  This switchboard is used
        # to shunt off badly formatted messages.  We don't want to just trash
        # them because they may be fixable with human intervention.  Just get
        # them out of our sight.
        #
        # Find out which mailing list this message is destined for.
        mlist = None
        missing = object()
        # First try to dig out the target list by id.  If there's no list-id
        # in the metadata, fall back to the fqdn list name for backward
        # compatibility.
        list_manager = getUtility(IListManager)
        list_id = msgdata.get('listid', missing)
        fqdn_listname = None
        if list_id is missing:
            fqdn_listname = msgdata.get('listname', missing)
            # XXX Deprecate.
            if fqdn_listname is not missing:
                mlist = list_manager.get(fqdn_listname)
        else:
            mlist = list_manager.get_by_list_id(list_id)
        if mlist is None:
            identifier = (list_id if list_id is not None else fqdn_listname)
            elog.error(
                '%s runner "%s" shunting message for missing list: %s',
                msg['message-id'], self.name, identifier)
            config.switchboards['shunt'].enqueue(msg, msgdata)
            return
        # Now process this message.  We also want to set up the language
        # context for this message.  The context will be the preferred
        # language for the user if the sender is a member of the list, or it
        # will be the list's preferred language.  However, we must take
        # special care to reset the defaults, otherwise subsequent messages
        # may be translated incorrectly.
        if mlist is None:
            language_manager = getUtility(ILanguageManager)
            language = language_manager[config.mailman.default_language]
        elif msg.sender:
            member = mlist.members.get_member(msg.sender)
            language = (member.preferred_language
                        if member is not None
                        else mlist.preferred_language)
        else:
            language = mlist.preferred_language
        with _.using(language.code):
            msgdata['lang'] = language.code
            try:
                keepqueued = self._dispose(mlist, msg, msgdata)
            except Exception as error:
                # Trigger the Zope event and re-raise
                notify(RunnerCrashEvent(self, mlist, msg, msgdata, error))
                raise
        if keepqueued:
            self.switchboard.enqueue(msg, msgdata)

    def _log(self, exc):
        elog.error('Uncaught runner exception: %s', exc)
        s = StringIO()
        traceback.print_exc(file=s)
        elog.error('%s', s.getvalue())

    def _clean_up(self):
        """See `IRunner`."""
        pass

    def _dispose(self, mlist, msg, msgdata):
        """See `IRunner`."""
        raise NotImplementedError

    def _do_periodic(self):
        """See `IRunner`."""
        pass

    def _snooze(self, filecnt):
        """See `IRunner`."""
        if filecnt or self.sleep_float <= 0:
            return
        time.sleep(self.sleep_float)

    def _short_circuit(self):
        """See `IRunner`."""
        return self._stop
예제 #4
0
class Runner:
    is_queue_runner = True

    def __init__(self, name, slice=None):
        """Create a runner.

        :param slice: The slice number for this runner.  This is passed
            directly to the underlying `ISwitchboard` object.  This is ignored
            for runners that don't manage a queue.
        :type slice: int or None
        """
        # Grab the configuration section.
        self.name = name
        section = getattr(config, 'runner.' + name)
        substitutions = config.paths
        substitutions['name'] = name
        numslices = int(section.instances)
        # Check whether the runner is queue runner or not; non-queue runner
        # should not have queue_directory or switchboard instance.
        if self.is_queue_runner:
            self.queue_directory = expand(section.path, None, substitutions)
            self.switchboard = Switchboard(name, self.queue_directory, slice,
                                           numslices, True)
        else:
            self.queue_directory = None
            self.switchboard = None
        self.sleep_time = as_timedelta(section.sleep_time)
        # sleep_time is a timedelta; turn it into a float for time.sleep().
        self.sleep_float = (86400 * self.sleep_time.days +
                            self.sleep_time.seconds +
                            self.sleep_time.microseconds / 1.0e6)
        self.max_restarts = int(section.max_restarts)
        self.start = as_boolean(section.start)
        self._stop = False
        self.status = 0

    def __repr__(self):
        return '<{} at {:#x}>'.format(self.__class__.__name__, id(self))

    def signal_handler(self, signum, frame):  # pragma: nocover
        signame = {
            signal.SIGTERM: 'SIGTERM',
            signal.SIGINT: 'SIGINT',
            signal.SIGUSR1: 'SIGUSR1',
        }.get(signum, signum)
        if signum == signal.SIGHUP:
            reopen()
            rlog.info('{} runner caught SIGHUP.  Reopening logs.'.format(
                self.name))
        elif signum in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
            self.stop()
            self.status = signum
            rlog.info('{} runner caught {}.  Stopping.'.format(
                self.name, signame))
            # As of Python 3.5, PEP 475 gets in our way.  Runners with long
            # time.sleep()'s in their _snooze() method (e.g. the retry runner)
            # will have their system call implemented time.sleep()
            # automatically retried at the C layer.  The only reliable way to
            # prevent this is to raise an exception in the signal handler.  The
            # standard run() method automatically suppresses this exception,
            # meaning, it's caught and ignored, but effectively breaks the
            # run() loop, which is just what we want.  Runners which implement
            # their own run() method must be prepared to catch
            # RunnerInterrupts, usually also ignoring them.
            raise RunnerInterrupt

    def set_signals(self):
        """See `IRunner`."""
        signal.signal(signal.SIGHUP, self.signal_handler)
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        signal.signal(signal.SIGUSR1, self.signal_handler)

    def stop(self):
        """See `IRunner`."""
        self._stop = True

    def run(self):
        """See `IRunner`."""
        # Start the main loop for this runner.
        with suppress(KeyboardInterrupt, RunnerInterrupt):
            while True:
                # Once through the loop that processes all the files in the
                # queue directory.
                filecnt = self._one_iteration()
                # Do the periodic work for the subclass.
                self._do_periodic()
                # If the stop flag is set, we're done.
                if self._stop:
                    break
                # Give the runner an opportunity to snooze for a while, but
                # pass it the file count so it can decide whether to do more
                # work now or not.
                self._snooze(filecnt)
        self._clean_up()

    def _one_iteration(self):
        """See `IRunner`."""
        me = self.__class__.__name__
        dlog.debug('[%s] starting oneloop', me)
        # List all the files in our queue directory.  The switchboard is
        # guaranteed to hand us the files in FIFO order.
        files = self.switchboard.files
        for filebase in files:
            dlog.debug('[%s] processing filebase: %s', me, filebase)
            try:
                # Ask the switchboard for the message and metadata objects
                # associated with this queue file.
                msg, msgdata = self.switchboard.dequeue(filebase)
            except Exception as error:
                # This used to just catch email.Errors.MessageParseError, but
                # other problems can occur in message parsing, e.g.
                # ValueError, and exceptions can occur in unpickling too.  We
                # don't want the runner to die, so we just log and skip this
                # entry, but preserve it for analysis.
                self._log(error)
                elog.error('Skipping and preserving unparseable message: %s',
                           filebase)
                self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
                continue
            try:
                dlog.debug('[%s] processing onefile', me)
                self._process_one_file(msg, msgdata)
                dlog.debug('[%s] finishing filebase: %s', me, filebase)
                self.switchboard.finish(filebase)
            except Exception as error:
                # All runners that implement _dispose() must guarantee that
                # exceptions are caught and dealt with properly.  Still, there
                # may be a bug in the infrastructure, and we do not want those
                # to cause messages to be lost.  Any uncaught exceptions will
                # cause the message to be stored in the shunt queue for human
                # intervention.
                self._log(error)
                # Put a marker in the metadata for unshunting.
                msgdata['whichq'] = self.switchboard.name
                # It is possible that shunting can throw an exception, e.g. a
                # permissions problem or a MemoryError due to a really large
                # message.  Try to be graceful.
                try:
                    shunt = config.switchboards['shunt']
                    new_filebase = shunt.enqueue(msg, msgdata)
                    elog.error('SHUNTING: %s', new_filebase)
                    self.switchboard.finish(filebase)
                except Exception as error:
                    # The message wasn't successfully shunted.  Log the
                    # exception and try to preserve the original queue entry
                    # for possible analysis.
                    self._log(error)
                    elog.error(
                        'SHUNTING FAILED, preserving original entry: %s',
                        filebase)
                    self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
            # Other work we want to do each time through the loop.
            dlog.debug('[%s] doing periodic', me)
            self._do_periodic()
            dlog.debug('[%s] committing transaction', me)
            config.db.commit()
            dlog.debug('[%s] checking short circuit', me)
            if self._short_circuit():
                dlog.debug('[%s] short circuiting', me)
                break
        dlog.debug('[%s] ending oneloop: %s', me, len(files))
        return len(files)

    def _process_one_file(self, msg, msgdata):
        """See `IRunner`."""
        # Do some common sanity checking on the message metadata.  It's got to
        # be destined for a particular mailing list.  This switchboard is used
        # to shunt off badly formatted messages.  We don't want to just trash
        # them because they may be fixable with human intervention.  Just get
        # them out of our sight.
        #
        # Find out which mailing list this message is destined for.
        mlist = None
        missing = object()
        # First try to dig out the target list by id.  If there's no list-id
        # in the metadata, fall back to the fqdn list name for backward
        # compatibility.
        list_manager = getUtility(IListManager)
        list_id = msgdata.get('listid', missing)
        fqdn_listname = None
        if list_id is missing:
            fqdn_listname = msgdata.get('listname', missing)
            # XXX Deprecate.
            if fqdn_listname is not missing:
                mlist = list_manager.get(fqdn_listname)
        else:
            mlist = list_manager.get_by_list_id(list_id)
        if mlist is None:
            identifier = (list_id if list_id is not None else fqdn_listname)
            elog.error('%s runner "%s" shunting message for missing list: %s',
                       msg['message-id'], self.name, identifier)
            config.switchboards['shunt'].enqueue(msg, msgdata)
            return
        # Now process this message.  We also want to set up the language
        # context for this message.  The context will be the preferred
        # language for the user if the sender is a member of the list, or it
        # will be the list's preferred language.  However, we must take
        # special care to reset the defaults, otherwise subsequent messages
        # may be translated incorrectly.
        if mlist is None:
            language_manager = getUtility(ILanguageManager)
            language = language_manager[config.mailman.default_language]
        elif msg.sender:
            member = mlist.members.get_member(msg.sender)
            language = (member.preferred_language
                        if member is not None else mlist.preferred_language)
        else:
            language = mlist.preferred_language
        with _.using(language.code):
            msgdata['lang'] = language.code
            try:
                keepqueued = self._dispose(mlist, msg, msgdata)
            except Exception as error:
                # Trigger the Zope event and re-raise
                notify(RunnerCrashEvent(self, mlist, msg, msgdata, error))
                raise
        if keepqueued:
            self.switchboard.enqueue(msg, msgdata)

    def _log(self, exc):
        elog.error('Uncaught runner exception: %s', exc)
        s = StringIO()
        traceback.print_exc(file=s)
        elog.error('%s', s.getvalue())

    def _clean_up(self):
        """See `IRunner`."""
        pass

    def _dispose(self, mlist, msg, msgdata):
        """See `IRunner`."""
        raise NotImplementedError

    def _do_periodic(self):
        """See `IRunner`."""
        pass

    def _snooze(self, filecnt):
        """See `IRunner`."""
        if filecnt or self.sleep_float <= 0:
            return
        time.sleep(self.sleep_float)

    def _short_circuit(self):
        """See `IRunner`."""
        return self._stop
예제 #5
0
class Archiver(object):

    name = "hyperkitty"

    def __init__(self):
        self._conf = {}
        self._load_conf()
        queue_directory = os.path.join(config.ARCHIVE_DIR, self.name, 'spool')
        self._switchboard = Switchboard(self.name,
                                        queue_directory,
                                        recover=False)

    @property
    def base_url(self):
        return self._conf["base_url"]

    @property
    def api_key(self):
        return self._conf["api_key"]

    def _load_conf(self):
        """
        Find the location of the HyperKitty-specific config file from Mailman's
        main config file and load the values.
        """
        # Read our specific configuration file
        archiver_config = external_configuration(
            config.archiver.hyperkitty.configuration)
        for option in ("base_url", ):
            url = archiver_config.get("general", option)
            if not url.endswith("/"):
                url += "/"
            self._conf[option] = url
        self._conf["api_key"] = archiver_config.get("general", "api_key")

    def _get_url(self, mlist, params):
        params.update({"key": self.api_key})
        url = urljoin(self.base_url, "api/mailman/urls")
        result = requests.get(url, params=params)
        if result.status_code != 200:
            logger.error("HyperKitty failure on %s: %s (%s)", url, result.text,
                         result.status_code)
            return ""
        try:
            result = result.json()
        except ValueError as e:
            logger.exception("Invalid response from HyperKitty on %s: %s", url,
                             e)
            return ""
        return result["url"]

    def list_url(self, mlist):
        """Return the url to the top of the list's archive.

        :param mlist: The IMailingList object.
        :returns: The url string.
        """
        return self._get_url(mlist, {"mlist": mlist.fqdn_listname})

    def permalink(self, mlist, msg):
        """Return the url to the message in the archive.

        This url points directly to the message in the archive.  This method
        only calculates the url, it does not actually archive the message.

        :param mlist: The IMailingList object.
        :param msg: The message object.
        :returns: The url string or None if the message's archive url cannot
            be calculated.
        """
        msg_id = msg['Message-Id'].strip().strip("<>")
        return self._get_url(mlist, {
            "mlist": mlist.fqdn_listname,
            "msgid": msg_id
        })

    def archive_message(self, mlist, msg):
        """
        Send the message to the archiver, but process the queue first if it
        contains any held messages.

        :param mlist: The IMailingList object.
        :param msg: The message object.
        :returns: The url string or None if the message's archive url cannot
            be calculated.
        """
        self.process_queue()
        return self._archive_message(mlist, msg)

    def _archive_message(self, mlist, msg, from_filebase=None):
        """Send the message to the archiver. If an exception occurs, queue the
        message for later retry.

        :param mlist: The IMailingList object.
        :param msg: The message object.
        :param from_filebase: If the message already comes from the retry
            queue, set the queue filebase here and it will be properly removed
            on success, or stored for analysis on error.
        :returns: The url string or None if the message's archive url cannot
            be calculated.
        """
        try:
            url = self._send_message(mlist, msg)
            if from_filebase is not None:
                self._switchboard.finish(from_filebase)
            return url
        except Exception as error:
            # Archiving failed, send the message to the queue.
            _log_error(error)
            # Enqueuing can throw an exception, e.g. a permissions problem
            # or a MemoryError due to a really large message.  Try to be
            # graceful.
            try:
                self._switchboard.enqueue(msg, mlist=mlist)
                if from_filebase is not None:
                    self._switchboard.finish(from_filebase)
            except Exception as error:
                # The message wasn't successfully enqueued.
                _log_error(error)
                logger.error(
                    'queuing failed on mailing-list %s for message %s',
                    mlist.list_id, msg['Message-Id'].strip())
                if from_filebase is not None:
                    # Try to preserve the original queue entry for possible
                    # analysis.
                    self._switchboard.finish(from_filebase, preserve=True)

    def _send_message(self, mlist, msg):
        """Send the message to the archiver over HTTP.

        :param mlist: The IMailingList object.
        :param msg: The message object.
        :returns: The url string or None if the message's archive url cannot
            be calculated.
        """
        logger.debug('%s archiver: sending message %s', self.name,
                     msg['Message-Id'].strip())
        url = urljoin(self.base_url, "api/mailman/archive")
        try:
            message_text = msg.as_string()
        except (MessageError, KeyError) as error:
            logger.error('Could not render the message with id %s to text: %s',
                         msg['Message-Id'].strip(), error)
            return  # permanent error, don't raise
        try:
            result = requests.post(
                url,
                params={"key": self.api_key},
                data={"mlist": mlist.fqdn_listname},
                files={"message": ("message.txt", message_text)})
        except requests.exceptions.RequestException as error:
            logger.error('Connection to HyperKitty failed: %s', error)
            raise
        if result.status_code != 200:
            logger.error("HyperKitty failure on %s: %s (%s)", url, result.text,
                         result.status_code)
            raise ValueError(result.text)
        try:
            result = result.json()
        except ValueError as e:
            logger.exception("Invalid response from HyperKitty on %s: %s", url,
                             e)
            raise
        archived_url = result["url"]
        logger.info("HyperKitty archived message %s to %s",
                    msg['Message-Id'].strip(), archived_url)
        return archived_url

    def process_queue(self):
        """Go through the queue of held messages to archive and send them to
        HyperKitty.
        If the archiving is successful, remove them from the queue, otherwise
        re-enqueue them.
        """
        self._switchboard.recover_backup_files
        files = self._switchboard.files
        for filebase in files:
            logger.debug('HyperKitty archiver processing queued filebase: %s',
                         filebase)
            try:
                # Ask the switchboard for the message and metadata objects
                # associated with this queue file.
                msg, msgdata = self._switchboard.dequeue(filebase)
            except Exception as error:
                # We don't want the process to die here or no further email can
                # be archived, so we just log and skip the entry, but preserve
                # it for analysis.
                _log_error(error)
                logger.error('Skipping and preserving unparseable message: %s',
                             filebase)
                self._switchboard.finish(filebase, preserve=True)
                continue
            mlist = msgdata["mlist"]
            self._archive_message(mlist, msg, from_filebase=filebase)
예제 #6
0
class Runner:
    intercept_signals = True

    def __init__(self, name, slice=None):
        """Create a runner.

        :param slice: The slice number for this runner.  This is passed
            directly to the underlying `ISwitchboard` object.  This is ignored
            for runners that don't manage a queue.
        :type slice: int or None
        """
        # Grab the configuration section.
        self.name = name
        section = getattr(config, "runner." + name)
        substitutions = config.paths
        substitutions["name"] = name
        self.queue_directory = expand(section.path, substitutions)
        numslices = int(section.instances)
        self.switchboard = Switchboard(name, self.queue_directory, slice, numslices, True)
        self.sleep_time = as_timedelta(section.sleep_time)
        # sleep_time is a timedelta; turn it into a float for time.sleep().
        self.sleep_float = 86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6
        self.max_restarts = int(section.max_restarts)
        self.start = as_boolean(section.start)
        self._stop = False

    def __repr__(self):
        return "<{0} at {1:#x}>".format(self.__class__.__name__, id(self))

    def stop(self):
        """See `IRunner`."""
        self._stop = True

    def run(self):
        """See `IRunner`."""
        # Start the main loop for this runner.
        try:
            while True:
                # Once through the loop that processes all the files in the
                # queue directory.
                filecnt = self._one_iteration()
                # Do the periodic work for the subclass.
                self._do_periodic()
                # If the stop flag is set, we're done.
                if self._stop:
                    break
                # Give the runner an opportunity to snooze for a while, but
                # pass it the file count so it can decide whether to do more
                # work now or not.
                self._snooze(filecnt)
        except KeyboardInterrupt:
            pass
        finally:
            self._clean_up()

    def _one_iteration(self):
        """See `IRunner`."""
        me = self.__class__.__name__
        dlog.debug("[%s] starting oneloop", me)
        # List all the files in our queue directory.  The switchboard is
        # guaranteed to hand us the files in FIFO order.
        files = self.switchboard.files
        for filebase in files:
            dlog.debug("[%s] processing filebase: %s", me, filebase)
            try:
                # Ask the switchboard for the message and metadata objects
                # associated with this queue file.
                msg, msgdata = self.switchboard.dequeue(filebase)
            except Exception as error:
                # This used to just catch email.Errors.MessageParseError, but
                # other problems can occur in message parsing, e.g.
                # ValueError, and exceptions can occur in unpickling too.  We
                # don't want the runner to die, so we just log and skip this
                # entry, but preserve it for analysis.
                self._log(error)
                elog.error("Skipping and preserving unparseable message: %s", filebase)
                self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
                continue
            try:
                dlog.debug("[%s] processing onefile", me)
                self._process_one_file(msg, msgdata)
                dlog.debug("[%s] finishing filebase: %s", me, filebase)
                self.switchboard.finish(filebase)
            except Exception as error:
                # All runners that implement _dispose() must guarantee that
                # exceptions are caught and dealt with properly.  Still, there
                # may be a bug in the infrastructure, and we do not want those
                # to cause messages to be lost.  Any uncaught exceptions will
                # cause the message to be stored in the shunt queue for human
                # intervention.
                self._log(error)
                # Put a marker in the metadata for unshunting.
                msgdata["whichq"] = self.switchboard.name
                # It is possible that shunting can throw an exception, e.g. a
                # permissions problem or a MemoryError due to a really large
                # message.  Try to be graceful.
                try:
                    shunt = config.switchboards["shunt"]
                    new_filebase = shunt.enqueue(msg, msgdata)
                    elog.error("SHUNTING: %s", new_filebase)
                    self.switchboard.finish(filebase)
                except Exception as error:
                    # The message wasn't successfully shunted.  Log the
                    # exception and try to preserve the original queue entry
                    # for possible analysis.
                    self._log(error)
                    elog.error("SHUNTING FAILED, preserving original entry: %s", filebase)
                    self.switchboard.finish(filebase, preserve=True)
                config.db.abort()
            # Other work we want to do each time through the loop.
            dlog.debug("[%s] doing periodic", me)
            self._do_periodic()
            dlog.debug("[%s] committing transaction", me)
            config.db.commit()
            dlog.debug("[%s] checking short circuit", me)
            if self._short_circuit():
                dlog.debug("[%s] short circuiting", me)
                break
        dlog.debug("[%s] ending oneloop: %s", me, len(files))
        return len(files)

    def _process_one_file(self, msg, msgdata):
        """See `IRunner`."""
        # Do some common sanity checking on the message metadata.  It's got to
        # be destined for a particular mailing list.  This switchboard is used
        # to shunt off badly formatted messages.  We don't want to just trash
        # them because they may be fixable with human intervention.  Just get
        # them out of our sight.
        #
        # Find out which mailing list this message is destined for.
        missing = object()
        listname = msgdata.get("listname", missing)
        mlist = None if listname is missing else getUtility(IListManager).get(unicode(listname))
        if mlist is None:
            elog.error(
                '%s runner "%s" shunting message for missing list: %s',
                msg["message-id"],
                self.name,
                ("n/a" if listname is missing else listname),
            )
            config.switchboards["shunt"].enqueue(msg, msgdata)
            return
        # Now process this message.  We also want to set up the language
        # context for this message.  The context will be the preferred
        # language for the user if the sender is a member of the list, or it
        # will be the list's preferred language.  However, we must take
        # special care to reset the defaults, otherwise subsequent messages
        # may be translated incorrectly.
        if mlist is None:
            language_manager = getUtility(ILanguageManager)
            language = language_manager[config.mailman.default_language]
        elif msg.sender:
            member = mlist.members.get_member(msg.sender)
            language = member.preferred_language if member is not None else mlist.preferred_language
        else:
            language = mlist.preferred_language
        with _.using(language.code):
            msgdata["lang"] = language.code
            keepqueued = self._dispose(mlist, msg, msgdata)
        if keepqueued:
            self.switchboard.enqueue(msg, msgdata)

    def _log(self, exc):
        elog.error("Uncaught runner exception: %s", exc)
        s = StringIO()
        traceback.print_exc(file=s)
        elog.error("%s", s.getvalue())

    def _clean_up(self):
        """See `IRunner`."""
        pass

    def _dispose(self, mlist, msg, msgdata):
        """See `IRunner`."""
        raise NotImplementedError

    def _do_periodic(self):
        """See `IRunner`."""
        pass

    def _snooze(self, filecnt):
        """See `IRunner`."""
        if filecnt or self.sleep_float <= 0:
            return
        time.sleep(self.sleep_float)

    def _short_circuit(self):
        """See `IRunner`."""
        return self._stop