def poll_newsgroup(mlist, conn, first, last, glock): listname = mlist.internal_name() # NEWNEWS is not portable and has synchronization issues. for num in range(first, last): glock.refresh() try: headers = conn.head(repr(num))[3] found_to = False beenthere = False for header in headers: i = header.find(':') value = header[:i].lower() if i > 0 and value == 'to': found_to = True # FIXME 2010-02-16 barry use List-Post header. if value <> 'x-beenthere': continue if header[i:] == ': %s' % mlist.posting_address: beenthere = True break if not beenthere: body = conn.body(repr(num))[3] # Usenet originated messages will not have a Unix envelope # (i.e. "From " header). This breaks Pipermail archiving, so # we will synthesize one. Be sure to use the format searched # for by mailbox.UnixMailbox._isrealfromline(). BAW: We use # the -bounces address here in case any downstream clients use # the envelope sender for bounces; I'm not sure about this, # but it's the closest to the old semantics. lines = ['From %s %s' % (mlist.GetBouncesEmail(), time.ctime(time.time()))] lines.extend(headers) lines.append('') lines.extend(body) lines.append('') p = Parser(Message.Message) try: msg = p.parsestr(NL.join(lines)) except email.Errors.MessageError as e: log.error('email package exception for %s:%d\n%s', mlist.linked_newsgroup, num, e) raise _ContinueLoop if found_to: del msg['X-Originally-To'] msg['X-Originally-To'] = msg['To'] del msg['To'] msg['To'] = mlist.posting_address # Post the message to the locked list inq = Switchboard(config.INQUEUE_DIR) inq.enqueue(msg, listid=mlist.list_id, fromusenet=True) log.info('posted to list %s: %7d', listname, num) except nntplib.NNTPError as e: log.exception('NNTP error for list %s: %7d', listname, num) except _ContinueLoop: continue # Even if we don't post the message because it was seen on the # list already, update the watermark mlist.usenet_watermark = num
def poll_newsgroup(mlist, conn, first, last, glock): listname = mlist.internal_name() # NEWNEWS is not portable and has synchronization issues. for num in range(first, last): glock.refresh() try: headers = conn.head(repr(num))[3] found_to = False beenthere = False for header in headers: i = header.find(':') value = header[:i].lower() if i > 0 and value == 'to': found_to = True # FIXME 2010-02-16 barry use List-Post header. if value <> 'x-beenthere': continue if header[i:] == ': %s' % mlist.posting_address: beenthere = True break if not beenthere: body = conn.body(repr(num))[3] # Usenet originated messages will not have a Unix envelope # (i.e. "From " header). This breaks Pipermail archiving, so # we will synthesize one. Be sure to use the format searched # for by mailbox.UnixMailbox._isrealfromline(). BAW: We use # the -bounces address here in case any downstream clients use # the envelope sender for bounces; I'm not sure about this, # but it's the closest to the old semantics. lines = [ 'From %s %s' % (mlist.GetBouncesEmail(), time.ctime(time.time())) ] lines.extend(headers) lines.append('') lines.extend(body) lines.append('') p = Parser(Message.Message) try: msg = p.parsestr(NL.join(lines)) except email.Errors.MessageError as e: log.error('email package exception for %s:%d\n%s', mlist.linked_newsgroup, num, e) raise _ContinueLoop if found_to: del msg['X-Originally-To'] msg['X-Originally-To'] = msg['To'] del msg['To'] msg['To'] = mlist.posting_address # Post the message to the locked list inq = Switchboard(config.INQUEUE_DIR) inq.enqueue(msg, listid=mlist.list_id, fromusenet=True) log.info('posted to list %s: %7d', listname, num) except nntplib.NNTPError as e: log.exception('NNTP error for list %s: %7d', listname, num) except _ContinueLoop: continue # Even if we don't post the message because it was seen on the # list already, update the watermark mlist.usenet_watermark = num
def __init__(self): self._conf = {} self._load_conf() queue_directory = os.path.join(config.ARCHIVE_DIR, self.name, 'spool') self._switchboard = Switchboard(self.name, queue_directory, recover=False)
def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, 'runner.' + name) substitutions = config.paths substitutions['name'] = name numslices = int(section.instances) # Check whether the runner is queue runner or not; non-queue runner # should not have queue_directory or switchboard instance. if self.is_queue_runner: self.queue_directory = expand(section.path, None, substitutions) self.switchboard = Switchboard(name, self.queue_directory, slice, numslices, True) else: self.queue_directory = None self.switchboard = None self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = (86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6) self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False self.status = 0
def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, 'runner.' + name) substitutions = config.paths substitutions['name'] = name numslices = int(section.instances) # Check whether the runner is queue runner or not; non-queue runner # should not have queue_directory or switchboard instance. if self.is_queue_runner: self.queue_directory = expand(section.path, substitutions) self.switchboard = Switchboard( name, self.queue_directory, slice, numslices, True) else: self.queue_directory = None self.switchboard= None self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = (86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6) self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False self.status = 0
def _post_process(self): """Perform post-processing after loading the configuration files.""" # Expand and set up all directories. self._expand_paths() # Set up the switchboards. Import this here to avoid circular imports. from mailman.core.switchboard import Switchboard Switchboard.initialize() # Set up all the languages. languages = self._config.getByCategory('language', []) language_manager = getUtility(ILanguageManager) for language in languages: if language.enabled: code = language.name.split('.')[1] language_manager.add( code, language.charset, language.description) # The default language must always be available. assert self._config.mailman.default_language in language_manager, ( 'System default language code not defined: %s' % self._config.mailman.default_language) self.ensure_directories_exist() getUtility(IStyleManager).populate() # Set the default system language. from mailman.core.i18n import _ _.default = self.mailman.default_language
def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, "runner." + name) substitutions = config.paths substitutions["name"] = name self.queue_directory = expand(section.path, substitutions) numslices = int(section.instances) self.switchboard = Switchboard(name, self.queue_directory, slice, numslices, True) self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = 86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6 self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False
class Runner: is_queue_runner = True def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, 'runner.' + name) substitutions = config.paths substitutions['name'] = name numslices = int(section.instances) # Check whether the runner is queue runner or not; non-queue runner # should not have queue_directory or switchboard instance. if self.is_queue_runner: self.queue_directory = expand(section.path, substitutions) self.switchboard = Switchboard( name, self.queue_directory, slice, numslices, True) else: self.queue_directory = None self.switchboard= None self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = (86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6) self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False self.status = 0 def __repr__(self): return '<{0} at {1:#x}>'.format(self.__class__.__name__, id(self)) def signal_handler(self, signum, frame): signame = { signal.SIGTERM: 'SIGTERM', signal.SIGINT: 'SIGINT', signal.SIGUSR1: 'SIGUSR1', }.get(signum, signum) if signum in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1): self.stop() self.status = signum rlog.info('%s runner caught %s. Stopping.', self.name, signame) elif signum == signal.SIGHUP: reopen() rlog.info('%s runner caught SIGHUP. Reopening logs.', self.name) def set_signals(self): """See `IRunner`.""" signal.signal(signal.SIGHUP, self.signal_handler) signal.signal(signal.SIGINT, self.signal_handler) signal.signal(signal.SIGTERM, self.signal_handler) signal.signal(signal.SIGUSR1, self.signal_handler) def stop(self): """See `IRunner`.""" self._stop = True def run(self): """See `IRunner`.""" # Start the main loop for this runner. try: while True: # Once through the loop that processes all the files in the # queue directory. filecnt = self._one_iteration() # Do the periodic work for the subclass. self._do_periodic() # If the stop flag is set, we're done. if self._stop: break # Give the runner an opportunity to snooze for a while, but # pass it the file count so it can decide whether to do more # work now or not. self._snooze(filecnt) except KeyboardInterrupt: pass finally: self._clean_up() def _one_iteration(self): """See `IRunner`.""" me = self.__class__.__name__ dlog.debug('[%s] starting oneloop', me) # List all the files in our queue directory. The switchboard is # guaranteed to hand us the files in FIFO order. files = self.switchboard.files for filebase in files: dlog.debug('[%s] processing filebase: %s', me, filebase) try: # Ask the switchboard for the message and metadata objects # associated with this queue file. msg, msgdata = self.switchboard.dequeue(filebase) except Exception as error: # This used to just catch email.Errors.MessageParseError, but # other problems can occur in message parsing, e.g. # ValueError, and exceptions can occur in unpickling too. We # don't want the runner to die, so we just log and skip this # entry, but preserve it for analysis. self._log(error) elog.error('Skipping and preserving unparseable message: %s', filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() continue try: dlog.debug('[%s] processing onefile', me) self._process_one_file(msg, msgdata) dlog.debug('[%s] finishing filebase: %s', me, filebase) self.switchboard.finish(filebase) except Exception as error: # All runners that implement _dispose() must guarantee that # exceptions are caught and dealt with properly. Still, there # may be a bug in the infrastructure, and we do not want those # to cause messages to be lost. Any uncaught exceptions will # cause the message to be stored in the shunt queue for human # intervention. self._log(error) # Put a marker in the metadata for unshunting. msgdata['whichq'] = self.switchboard.name # It is possible that shunting can throw an exception, e.g. a # permissions problem or a MemoryError due to a really large # message. Try to be graceful. try: shunt = config.switchboards['shunt'] new_filebase = shunt.enqueue(msg, msgdata) elog.error('SHUNTING: %s', new_filebase) self.switchboard.finish(filebase) except Exception as error: # The message wasn't successfully shunted. Log the # exception and try to preserve the original queue entry # for possible analysis. self._log(error) elog.error( 'SHUNTING FAILED, preserving original entry: %s', filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() # Other work we want to do each time through the loop. dlog.debug('[%s] doing periodic', me) self._do_periodic() dlog.debug('[%s] committing transaction', me) config.db.commit() dlog.debug('[%s] checking short circuit', me) if self._short_circuit(): dlog.debug('[%s] short circuiting', me) break dlog.debug('[%s] ending oneloop: %s', me, len(files)) return len(files) def _process_one_file(self, msg, msgdata): """See `IRunner`.""" # Do some common sanity checking on the message metadata. It's got to # be destined for a particular mailing list. This switchboard is used # to shunt off badly formatted messages. We don't want to just trash # them because they may be fixable with human intervention. Just get # them out of our sight. # # Find out which mailing list this message is destined for. mlist = None missing = object() # First try to dig out the target list by id. If there's no list-id # in the metadata, fall back to the fqdn list name for backward # compatibility. list_manager = getUtility(IListManager) list_id = msgdata.get('listid', missing) fqdn_listname = None if list_id is missing: fqdn_listname = msgdata.get('listname', missing) # XXX Deprecate. if fqdn_listname is not missing: mlist = list_manager.get(fqdn_listname) else: mlist = list_manager.get_by_list_id(list_id) if mlist is None: identifier = (list_id if list_id is not None else fqdn_listname) elog.error( '%s runner "%s" shunting message for missing list: %s', msg['message-id'], self.name, identifier) config.switchboards['shunt'].enqueue(msg, msgdata) return # Now process this message. We also want to set up the language # context for this message. The context will be the preferred # language for the user if the sender is a member of the list, or it # will be the list's preferred language. However, we must take # special care to reset the defaults, otherwise subsequent messages # may be translated incorrectly. if mlist is None: language_manager = getUtility(ILanguageManager) language = language_manager[config.mailman.default_language] elif msg.sender: member = mlist.members.get_member(msg.sender) language = (member.preferred_language if member is not None else mlist.preferred_language) else: language = mlist.preferred_language with _.using(language.code): msgdata['lang'] = language.code try: keepqueued = self._dispose(mlist, msg, msgdata) except Exception as error: # Trigger the Zope event and re-raise notify(RunnerCrashEvent(self, mlist, msg, msgdata, error)) raise if keepqueued: self.switchboard.enqueue(msg, msgdata) def _log(self, exc): elog.error('Uncaught runner exception: %s', exc) s = StringIO() traceback.print_exc(file=s) elog.error('%s', s.getvalue()) def _clean_up(self): """See `IRunner`.""" pass def _dispose(self, mlist, msg, msgdata): """See `IRunner`.""" raise NotImplementedError def _do_periodic(self): """See `IRunner`.""" pass def _snooze(self, filecnt): """See `IRunner`.""" if filecnt or self.sleep_float <= 0: return time.sleep(self.sleep_float) def _short_circuit(self): """See `IRunner`.""" return self._stop
class Runner: is_queue_runner = True def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, 'runner.' + name) substitutions = config.paths substitutions['name'] = name numslices = int(section.instances) # Check whether the runner is queue runner or not; non-queue runner # should not have queue_directory or switchboard instance. if self.is_queue_runner: self.queue_directory = expand(section.path, None, substitutions) self.switchboard = Switchboard(name, self.queue_directory, slice, numslices, True) else: self.queue_directory = None self.switchboard = None self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = (86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6) self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False self.status = 0 def __repr__(self): return '<{} at {:#x}>'.format(self.__class__.__name__, id(self)) def signal_handler(self, signum, frame): # pragma: nocover signame = { signal.SIGTERM: 'SIGTERM', signal.SIGINT: 'SIGINT', signal.SIGUSR1: 'SIGUSR1', }.get(signum, signum) if signum == signal.SIGHUP: reopen() rlog.info('{} runner caught SIGHUP. Reopening logs.'.format( self.name)) elif signum in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1): self.stop() self.status = signum rlog.info('{} runner caught {}. Stopping.'.format( self.name, signame)) # As of Python 3.5, PEP 475 gets in our way. Runners with long # time.sleep()'s in their _snooze() method (e.g. the retry runner) # will have their system call implemented time.sleep() # automatically retried at the C layer. The only reliable way to # prevent this is to raise an exception in the signal handler. The # standard run() method automatically suppresses this exception, # meaning, it's caught and ignored, but effectively breaks the # run() loop, which is just what we want. Runners which implement # their own run() method must be prepared to catch # RunnerInterrupts, usually also ignoring them. raise RunnerInterrupt def set_signals(self): """See `IRunner`.""" signal.signal(signal.SIGHUP, self.signal_handler) signal.signal(signal.SIGINT, self.signal_handler) signal.signal(signal.SIGTERM, self.signal_handler) signal.signal(signal.SIGUSR1, self.signal_handler) def stop(self): """See `IRunner`.""" self._stop = True def run(self): """See `IRunner`.""" # Start the main loop for this runner. with suppress(KeyboardInterrupt, RunnerInterrupt): while True: # Once through the loop that processes all the files in the # queue directory. filecnt = self._one_iteration() # Do the periodic work for the subclass. self._do_periodic() # If the stop flag is set, we're done. if self._stop: break # Give the runner an opportunity to snooze for a while, but # pass it the file count so it can decide whether to do more # work now or not. self._snooze(filecnt) self._clean_up() def _one_iteration(self): """See `IRunner`.""" me = self.__class__.__name__ dlog.debug('[%s] starting oneloop', me) # List all the files in our queue directory. The switchboard is # guaranteed to hand us the files in FIFO order. files = self.switchboard.files for filebase in files: dlog.debug('[%s] processing filebase: %s', me, filebase) try: # Ask the switchboard for the message and metadata objects # associated with this queue file. msg, msgdata = self.switchboard.dequeue(filebase) except Exception as error: # This used to just catch email.Errors.MessageParseError, but # other problems can occur in message parsing, e.g. # ValueError, and exceptions can occur in unpickling too. We # don't want the runner to die, so we just log and skip this # entry, but preserve it for analysis. self._log(error) elog.error('Skipping and preserving unparseable message: %s', filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() continue try: dlog.debug('[%s] processing onefile', me) self._process_one_file(msg, msgdata) dlog.debug('[%s] finishing filebase: %s', me, filebase) self.switchboard.finish(filebase) except Exception as error: # All runners that implement _dispose() must guarantee that # exceptions are caught and dealt with properly. Still, there # may be a bug in the infrastructure, and we do not want those # to cause messages to be lost. Any uncaught exceptions will # cause the message to be stored in the shunt queue for human # intervention. self._log(error) # Put a marker in the metadata for unshunting. msgdata['whichq'] = self.switchboard.name # It is possible that shunting can throw an exception, e.g. a # permissions problem or a MemoryError due to a really large # message. Try to be graceful. try: shunt = config.switchboards['shunt'] new_filebase = shunt.enqueue(msg, msgdata) elog.error('SHUNTING: %s', new_filebase) self.switchboard.finish(filebase) except Exception as error: # The message wasn't successfully shunted. Log the # exception and try to preserve the original queue entry # for possible analysis. self._log(error) elog.error( 'SHUNTING FAILED, preserving original entry: %s', filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() # Other work we want to do each time through the loop. dlog.debug('[%s] doing periodic', me) self._do_periodic() dlog.debug('[%s] committing transaction', me) config.db.commit() dlog.debug('[%s] checking short circuit', me) if self._short_circuit(): dlog.debug('[%s] short circuiting', me) break dlog.debug('[%s] ending oneloop: %s', me, len(files)) return len(files) def _process_one_file(self, msg, msgdata): """See `IRunner`.""" # Do some common sanity checking on the message metadata. It's got to # be destined for a particular mailing list. This switchboard is used # to shunt off badly formatted messages. We don't want to just trash # them because they may be fixable with human intervention. Just get # them out of our sight. # # Find out which mailing list this message is destined for. mlist = None missing = object() # First try to dig out the target list by id. If there's no list-id # in the metadata, fall back to the fqdn list name for backward # compatibility. list_manager = getUtility(IListManager) list_id = msgdata.get('listid', missing) fqdn_listname = None if list_id is missing: fqdn_listname = msgdata.get('listname', missing) # XXX Deprecate. if fqdn_listname is not missing: mlist = list_manager.get(fqdn_listname) else: mlist = list_manager.get_by_list_id(list_id) if mlist is None: identifier = (list_id if list_id is not None else fqdn_listname) elog.error('%s runner "%s" shunting message for missing list: %s', msg['message-id'], self.name, identifier) config.switchboards['shunt'].enqueue(msg, msgdata) return # Now process this message. We also want to set up the language # context for this message. The context will be the preferred # language for the user if the sender is a member of the list, or it # will be the list's preferred language. However, we must take # special care to reset the defaults, otherwise subsequent messages # may be translated incorrectly. if mlist is None: language_manager = getUtility(ILanguageManager) language = language_manager[config.mailman.default_language] elif msg.sender: member = mlist.members.get_member(msg.sender) language = (member.preferred_language if member is not None else mlist.preferred_language) else: language = mlist.preferred_language with _.using(language.code): msgdata['lang'] = language.code try: keepqueued = self._dispose(mlist, msg, msgdata) except Exception as error: # Trigger the Zope event and re-raise notify(RunnerCrashEvent(self, mlist, msg, msgdata, error)) raise if keepqueued: self.switchboard.enqueue(msg, msgdata) def _log(self, exc): elog.error('Uncaught runner exception: %s', exc) s = StringIO() traceback.print_exc(file=s) elog.error('%s', s.getvalue()) def _clean_up(self): """See `IRunner`.""" pass def _dispose(self, mlist, msg, msgdata): """See `IRunner`.""" raise NotImplementedError def _do_periodic(self): """See `IRunner`.""" pass def _snooze(self, filecnt): """See `IRunner`.""" if filecnt or self.sleep_float <= 0: return time.sleep(self.sleep_float) def _short_circuit(self): """See `IRunner`.""" return self._stop
class Archiver(object): name = "hyperkitty" def __init__(self): self._conf = {} self._load_conf() queue_directory = os.path.join(config.ARCHIVE_DIR, self.name, 'spool') self._switchboard = Switchboard(self.name, queue_directory, recover=False) @property def base_url(self): return self._conf["base_url"] @property def api_key(self): return self._conf["api_key"] def _load_conf(self): """ Find the location of the HyperKitty-specific config file from Mailman's main config file and load the values. """ # Read our specific configuration file archiver_config = external_configuration( config.archiver.hyperkitty.configuration) for option in ("base_url", ): url = archiver_config.get("general", option) if not url.endswith("/"): url += "/" self._conf[option] = url self._conf["api_key"] = archiver_config.get("general", "api_key") def _get_url(self, mlist, params): params.update({"key": self.api_key}) url = urljoin(self.base_url, "api/mailman/urls") result = requests.get(url, params=params) if result.status_code != 200: logger.error("HyperKitty failure on %s: %s (%s)", url, result.text, result.status_code) return "" try: result = result.json() except ValueError as e: logger.exception("Invalid response from HyperKitty on %s: %s", url, e) return "" return result["url"] def list_url(self, mlist): """Return the url to the top of the list's archive. :param mlist: The IMailingList object. :returns: The url string. """ return self._get_url(mlist, {"mlist": mlist.fqdn_listname}) def permalink(self, mlist, msg): """Return the url to the message in the archive. This url points directly to the message in the archive. This method only calculates the url, it does not actually archive the message. :param mlist: The IMailingList object. :param msg: The message object. :returns: The url string or None if the message's archive url cannot be calculated. """ msg_id = msg['Message-Id'].strip().strip("<>") return self._get_url(mlist, { "mlist": mlist.fqdn_listname, "msgid": msg_id }) def archive_message(self, mlist, msg): """ Send the message to the archiver, but process the queue first if it contains any held messages. :param mlist: The IMailingList object. :param msg: The message object. :returns: The url string or None if the message's archive url cannot be calculated. """ self.process_queue() return self._archive_message(mlist, msg) def _archive_message(self, mlist, msg, from_filebase=None): """Send the message to the archiver. If an exception occurs, queue the message for later retry. :param mlist: The IMailingList object. :param msg: The message object. :param from_filebase: If the message already comes from the retry queue, set the queue filebase here and it will be properly removed on success, or stored for analysis on error. :returns: The url string or None if the message's archive url cannot be calculated. """ try: url = self._send_message(mlist, msg) if from_filebase is not None: self._switchboard.finish(from_filebase) return url except Exception as error: # Archiving failed, send the message to the queue. _log_error(error) # Enqueuing can throw an exception, e.g. a permissions problem # or a MemoryError due to a really large message. Try to be # graceful. try: self._switchboard.enqueue(msg, mlist=mlist) if from_filebase is not None: self._switchboard.finish(from_filebase) except Exception as error: # The message wasn't successfully enqueued. _log_error(error) logger.error( 'queuing failed on mailing-list %s for message %s', mlist.list_id, msg['Message-Id'].strip()) if from_filebase is not None: # Try to preserve the original queue entry for possible # analysis. self._switchboard.finish(from_filebase, preserve=True) def _send_message(self, mlist, msg): """Send the message to the archiver over HTTP. :param mlist: The IMailingList object. :param msg: The message object. :returns: The url string or None if the message's archive url cannot be calculated. """ logger.debug('%s archiver: sending message %s', self.name, msg['Message-Id'].strip()) url = urljoin(self.base_url, "api/mailman/archive") try: message_text = msg.as_string() except (MessageError, KeyError) as error: logger.error('Could not render the message with id %s to text: %s', msg['Message-Id'].strip(), error) return # permanent error, don't raise try: result = requests.post( url, params={"key": self.api_key}, data={"mlist": mlist.fqdn_listname}, files={"message": ("message.txt", message_text)}) except requests.exceptions.RequestException as error: logger.error('Connection to HyperKitty failed: %s', error) raise if result.status_code != 200: logger.error("HyperKitty failure on %s: %s (%s)", url, result.text, result.status_code) raise ValueError(result.text) try: result = result.json() except ValueError as e: logger.exception("Invalid response from HyperKitty on %s: %s", url, e) raise archived_url = result["url"] logger.info("HyperKitty archived message %s to %s", msg['Message-Id'].strip(), archived_url) return archived_url def process_queue(self): """Go through the queue of held messages to archive and send them to HyperKitty. If the archiving is successful, remove them from the queue, otherwise re-enqueue them. """ self._switchboard.recover_backup_files files = self._switchboard.files for filebase in files: logger.debug('HyperKitty archiver processing queued filebase: %s', filebase) try: # Ask the switchboard for the message and metadata objects # associated with this queue file. msg, msgdata = self._switchboard.dequeue(filebase) except Exception as error: # We don't want the process to die here or no further email can # be archived, so we just log and skip the entry, but preserve # it for analysis. _log_error(error) logger.error('Skipping and preserving unparseable message: %s', filebase) self._switchboard.finish(filebase, preserve=True) continue mlist = msgdata["mlist"] self._archive_message(mlist, msg, from_filebase=filebase)
class Runner: intercept_signals = True def __init__(self, name, slice=None): """Create a runner. :param slice: The slice number for this runner. This is passed directly to the underlying `ISwitchboard` object. This is ignored for runners that don't manage a queue. :type slice: int or None """ # Grab the configuration section. self.name = name section = getattr(config, "runner." + name) substitutions = config.paths substitutions["name"] = name self.queue_directory = expand(section.path, substitutions) numslices = int(section.instances) self.switchboard = Switchboard(name, self.queue_directory, slice, numslices, True) self.sleep_time = as_timedelta(section.sleep_time) # sleep_time is a timedelta; turn it into a float for time.sleep(). self.sleep_float = 86400 * self.sleep_time.days + self.sleep_time.seconds + self.sleep_time.microseconds / 1.0e6 self.max_restarts = int(section.max_restarts) self.start = as_boolean(section.start) self._stop = False def __repr__(self): return "<{0} at {1:#x}>".format(self.__class__.__name__, id(self)) def stop(self): """See `IRunner`.""" self._stop = True def run(self): """See `IRunner`.""" # Start the main loop for this runner. try: while True: # Once through the loop that processes all the files in the # queue directory. filecnt = self._one_iteration() # Do the periodic work for the subclass. self._do_periodic() # If the stop flag is set, we're done. if self._stop: break # Give the runner an opportunity to snooze for a while, but # pass it the file count so it can decide whether to do more # work now or not. self._snooze(filecnt) except KeyboardInterrupt: pass finally: self._clean_up() def _one_iteration(self): """See `IRunner`.""" me = self.__class__.__name__ dlog.debug("[%s] starting oneloop", me) # List all the files in our queue directory. The switchboard is # guaranteed to hand us the files in FIFO order. files = self.switchboard.files for filebase in files: dlog.debug("[%s] processing filebase: %s", me, filebase) try: # Ask the switchboard for the message and metadata objects # associated with this queue file. msg, msgdata = self.switchboard.dequeue(filebase) except Exception as error: # This used to just catch email.Errors.MessageParseError, but # other problems can occur in message parsing, e.g. # ValueError, and exceptions can occur in unpickling too. We # don't want the runner to die, so we just log and skip this # entry, but preserve it for analysis. self._log(error) elog.error("Skipping and preserving unparseable message: %s", filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() continue try: dlog.debug("[%s] processing onefile", me) self._process_one_file(msg, msgdata) dlog.debug("[%s] finishing filebase: %s", me, filebase) self.switchboard.finish(filebase) except Exception as error: # All runners that implement _dispose() must guarantee that # exceptions are caught and dealt with properly. Still, there # may be a bug in the infrastructure, and we do not want those # to cause messages to be lost. Any uncaught exceptions will # cause the message to be stored in the shunt queue for human # intervention. self._log(error) # Put a marker in the metadata for unshunting. msgdata["whichq"] = self.switchboard.name # It is possible that shunting can throw an exception, e.g. a # permissions problem or a MemoryError due to a really large # message. Try to be graceful. try: shunt = config.switchboards["shunt"] new_filebase = shunt.enqueue(msg, msgdata) elog.error("SHUNTING: %s", new_filebase) self.switchboard.finish(filebase) except Exception as error: # The message wasn't successfully shunted. Log the # exception and try to preserve the original queue entry # for possible analysis. self._log(error) elog.error("SHUNTING FAILED, preserving original entry: %s", filebase) self.switchboard.finish(filebase, preserve=True) config.db.abort() # Other work we want to do each time through the loop. dlog.debug("[%s] doing periodic", me) self._do_periodic() dlog.debug("[%s] committing transaction", me) config.db.commit() dlog.debug("[%s] checking short circuit", me) if self._short_circuit(): dlog.debug("[%s] short circuiting", me) break dlog.debug("[%s] ending oneloop: %s", me, len(files)) return len(files) def _process_one_file(self, msg, msgdata): """See `IRunner`.""" # Do some common sanity checking on the message metadata. It's got to # be destined for a particular mailing list. This switchboard is used # to shunt off badly formatted messages. We don't want to just trash # them because they may be fixable with human intervention. Just get # them out of our sight. # # Find out which mailing list this message is destined for. missing = object() listname = msgdata.get("listname", missing) mlist = None if listname is missing else getUtility(IListManager).get(unicode(listname)) if mlist is None: elog.error( '%s runner "%s" shunting message for missing list: %s', msg["message-id"], self.name, ("n/a" if listname is missing else listname), ) config.switchboards["shunt"].enqueue(msg, msgdata) return # Now process this message. We also want to set up the language # context for this message. The context will be the preferred # language for the user if the sender is a member of the list, or it # will be the list's preferred language. However, we must take # special care to reset the defaults, otherwise subsequent messages # may be translated incorrectly. if mlist is None: language_manager = getUtility(ILanguageManager) language = language_manager[config.mailman.default_language] elif msg.sender: member = mlist.members.get_member(msg.sender) language = member.preferred_language if member is not None else mlist.preferred_language else: language = mlist.preferred_language with _.using(language.code): msgdata["lang"] = language.code keepqueued = self._dispose(mlist, msg, msgdata) if keepqueued: self.switchboard.enqueue(msg, msgdata) def _log(self, exc): elog.error("Uncaught runner exception: %s", exc) s = StringIO() traceback.print_exc(file=s) elog.error("%s", s.getvalue()) def _clean_up(self): """See `IRunner`.""" pass def _dispose(self, mlist, msg, msgdata): """See `IRunner`.""" raise NotImplementedError def _do_periodic(self): """See `IRunner`.""" pass def _snooze(self, filecnt): """See `IRunner`.""" if filecnt or self.sleep_float <= 0: return time.sleep(self.sleep_float) def _short_circuit(self): """See `IRunner`.""" return self._stop