class Redirector(Greenlet): def __init__(self, msg): self.sock_type = msg.sock_type self.info = msg self.sock = socket.socket(socket.AF_INET, self.sock_type) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.links = Group() super(Redirector, self).__init__() def _run(self): self.sock.bind(self.info.local) self.sock.listen(64) while True: cli, addr = self.sock.accept() self.links.start(Linker(self.info.remote, self.sock_type, cli)) def kill(self): self.links.kill() super(Redirector, self).kill() self.sock.close() def dump(self): return dict(protocol=self.info.protocol, local='%s:%d' % self.info.local, remote='%s:%d' % self.info.remote)
def main(workers): create_app(config=CONFIG_MODULE, config_logger=False) queue = SubmitQueue() worker_group = Group() for _ in range(workers): worker_group.start(SubmitWorker(queue)) worker_group.join()
def start_node_monitor(self): def _monitor(runner: NodeRunner): while not self._runner.root_task.done: if runner.state is NodeState.STARTED: runner.check() gevent.sleep(0.5) monitor_group = Group() for runner in self._node_runners: monitor_group.start(Greenlet(_monitor, runner)) def _wait(): while not monitor_group.join(0.5, raise_error=True): pass return gevent.spawn(_wait)
def start_node_monitor(self): def _monitor(runner: NodeRunner): while not self._runner.root_task.done: if runner.state is NodeState.STARTED: try: runner.executor.check_subprocess() except ProcessExitedWithError as ex: raise ScenarioError( f'Raiden node {runner._index} died with non-zero exit status', ) from ex gevent.sleep(.5) monitor_group = Group() for runner in self._node_runners: monitor_group.start(Greenlet(_monitor, runner)) def _wait(): while not monitor_group.join(.5, raise_error=True): pass return gevent.spawn(_wait)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. """ def __init__(self, account, heartbeat=1, refresh_frequency=30): self.refresh_frequency = refresh_frequency self.syncmanager_lock = BoundedSemaphore(1) self.saved_remote_folders = None self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.delete_handler = None BaseMailSyncMonitor.__init__(self, account, heartbeat) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of folder names for the folders we want to sync (in order). """ with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() # The folders we should be syncing sync_folders = crispin_client.sync_folders() if self.saved_remote_folders != remote_folders: with session_scope(self.namespace_id) as db_session: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders return sync_folders def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = { sanitize_name(f.display_name) for f in raw_folders } assert "inbox" in {f.role for f in raw_folders }, "Account {} has no detected inbox folder".format( account.email_address) local_folders = { f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id) } # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info("Folder deleted from remote", account_id=self.account_id, name=name) if local_folders[name].category_id is not None: cat = db_session.query(Category).get( local_folders[name].category_id) if cat is not None: db_session.delete(cat) del local_folders[name] # Create new folders for raw_folder in raw_folders: folder = Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) if folder.canonical_name != raw_folder.role: folder.canonical_name = raw_folder.role # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self): running_monitors = { monitor.folder_name: monitor for monitor in self.folder_monitors } for folder_name in self.prepare_sync(): if folder_name in running_monitors: thread = running_monitors[folder_name] else: log.info( "Folder sync engine started", account_id=self.account_id, folder_name=folder_name, ) thread = self.sync_engine_class( self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock, ) self.folder_monitors.start(thread) while thread.state != "poll" and not thread.ready(): sleep(self.heartbeat) if thread.ready(): log.info( "Folder sync engine exited", account_id=self.account_id, folder_name=folder_name, error=thread.exception, ) def start_delete_handler(self): if self.delete_handler is None: self.delete_handler = DeleteHandler( account_id=self.account_id, namespace_id=self.namespace_id, provider_name=self.provider_name, uid_accessor=lambda m: m.imapuids, ) self.delete_handler.start() def sync(self): try: self.start_delete_handler() self.start_new_folder_sync_engines() while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines() except ValidationError as exc: log.error( "Error authenticating; stopping sync", exc_info=True, account_id=self.account_id, logstash_tag="mark_invalid", ) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(exc)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. poll_frequency: Integer Seconds to wait between polling for the greenlets spawned refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.refresh_frequency = refresh_frequency self.poll_frequency = poll_frequency self.syncmanager_lock = BoundedSemaphore(1) self.refresh_flags_max = refresh_flags_max self.saved_remote_folders = None provider_supports_condstore = account.provider_info.get( 'condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order). """ with mailsync_session_scope() as db_session: with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() if self.saved_remote_folders != remote_folders: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders # The folders we should be syncing sync_folders = crispin_client.sync_folders() sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error('Missing Folder object when starting sync', folder_name=folder_name) raise MailsyncError( "Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = { f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH] for f in raw_folders } assert 'inbox' in {f.role for f in raw_folders},\ 'Account {} has no detected inbox folder'.\ format(account.email_address) local_folders = { f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id) } # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info('Folder deleted from remote', account_id=self.account_id, name=name) db_session.delete(local_folders[name]) del local_folders[name] # Create new folders for raw_folder in raw_folders: Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info('Folder sync engine started', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) thread = self.sync_engine_class( self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) self.folder_monitors.start(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): if thread.exception: # Exceptions causing the folder sync to exit should not # clear the heartbeat. log.info('Folder sync engine exited with error', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name, error=thread.exception) else: log.info('Folder sync engine finished', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) # clear the heartbeat for this folder-thread since it # exited cleanly. clear_heartbeat_status(self.account_id, folder_id) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id)) def start_delete_handler(self): self.delete_handler = DeleteHandler(account_id=self.account_id, namespace_id=self.namespace_id, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def sync(self): try: self.start_delete_handler() folders = set() self.start_new_folder_sync_engines(folders) while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines(folders) except ValidationError as exc: log.error('Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, logstash_tag='mark_invalid') with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc))
def start(self, greenlet=None): """Start the greenlet pool or add a greenlet to the pool.""" if greenlet is not None: return Group.start(self, greenlet)
def _run(self, *args, **kwargs): group = Group() for task in self._tasks: group.start(Greenlet(task)) group.join(raise_error=True)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. poll_frequency: Integer Seconds to wait between polling for the greenlets spawned refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.refresh_frequency = refresh_frequency self.poll_frequency = poll_frequency self.syncmanager_lock = BoundedSemaphore(1) self.refresh_flags_max = refresh_flags_max self.saved_remote_folders = None provider_supports_condstore = account.provider_info.get('condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order). """ with mailsync_session_scope() as db_session: with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() if self.saved_remote_folders != remote_folders: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders # The folders we should be syncing sync_folders = crispin_client.sync_folders() sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error('Missing Folder object when starting sync', folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH] for f in raw_folders} assert 'inbox' in {f.role for f in raw_folders},\ 'Account {} has no detected inbox folder'.\ format(account.email_address) local_folders = {f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id)} # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info('Folder deleted from remote', account_id=self.account_id, name=name) db_session.delete(local_folders[name]) del local_folders[name] # Create new folders for raw_folder in raw_folders: Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info('Folder sync engine started', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) thread = self.sync_engine_class(self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) self.folder_monitors.start(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): if thread.exception: # Exceptions causing the folder sync to exit should not # clear the heartbeat. log.info('Folder sync engine exited with error', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name, error=thread.exception) else: log.info('Folder sync engine finished', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) # clear the heartbeat for this folder-thread since it # exited cleanly. clear_heartbeat_status(self.account_id, folder_id) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id)) def start_delete_handler(self): self.delete_handler = DeleteHandler(account_id=self.account_id, namespace_id=self.namespace_id, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def sync(self): try: self.start_delete_handler() folders = set() self.start_new_folder_sync_engines(folders) while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines(folders) except ValidationError as exc: log.error( 'Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, logstash_tag='mark_invalid') with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc))
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. (DEPRECATED) refresh_frequency: Integer Seconds to wait between checking for new folders to sync. syncback_frequency: Integer Seconds to wait between performing consecutive syncback iterations and checking for new folders to sync. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, syncback_frequency=5): # DEPRECATED. # TODO[k]: Remove after sync-syncback integration deploy is complete. self.refresh_frequency = refresh_frequency self.syncmanager_lock = BoundedSemaphore(1) self.saved_remote_folders = None self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.delete_handler = None self.syncback_handler = None self.folder_sync_signals = {} self.syncback_timestamp = None self.syncback_frequency = syncback_frequency BaseMailSyncMonitor.__init__(self, account, heartbeat) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of folder names for the folders we want to sync (in order). """ with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() # The folders we should be syncing sync_folders = crispin_client.sync_folders() if self.saved_remote_folders != remote_folders: with session_scope(self.namespace_id) as db_session: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders return sync_folders def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = { f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH] for f in raw_folders } assert 'inbox' in {f.role for f in raw_folders},\ 'Account {} has no detected inbox folder'.\ format(account.email_address) local_folders = { f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id) } # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info('Folder deleted from remote', account_id=self.account_id, name=name) if local_folders[name].category_id is not None: cat = db_session.query(Category).get( local_folders[name].category_id) if cat is not None: db_session.delete(cat) del local_folders[name] # Create new folders for raw_folder in raw_folders: Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self): running_monitors = { monitor.folder_name: monitor for monitor in self.folder_monitors } with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).options( load_only('_sync_status')).get(self.account_id) s3_resync = account._sync_status.get('s3_resync', False) for folder_name in self.prepare_sync(): if folder_name in running_monitors: thread = running_monitors[folder_name] else: log.info('Folder sync engine started', account_id=self.account_id, folder_name=folder_name) self._add_sync_signal(folder_name) thread = self.sync_engine_class( self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock, self.folder_sync_signals[folder_name]) self.folder_monitors.start(thread) if s3_resync: log.info('Starting an S3 monitor', account_id=self.account_id) s3_thread = S3FolderSyncEngine(self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock, None) self.folder_monitors.start(s3_thread) while not thread.state == 'poll' and not thread.ready(): sleep(self.heartbeat) self.perform_syncback() if thread.ready(): self._remove_sync_signal[folder_name] log.info('Folder sync engine exited', account_id=self.account_id, folder_name=folder_name, error=thread.exception) def start_delete_handler(self): if self.delete_handler is None: self.delete_handler = DeleteHandler( account_id=self.account_id, namespace_id=self.namespace_id, provider_name=self.provider_name, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def perform_syncback(self): """ Perform syncback for the account. Syncback is performed iff all folder syncs are paused, and the previous syncback occurred more than syncback_frequency seconds ago. The first condition is checked by the call to _can_syncback(). The second condition is needed because if there are a large number of pending actions during initial sync, it could repeatedly get interrupted and put on hold for seconds at a time. """ from inbox.syncback.base import SyncbackHandler if not self._can_syncback(): log.info('Skipping syncback', reason='folder syncs running') return if (self.syncback_timestamp and (datetime.utcnow() - self.syncback_timestamp).seconds < self.syncback_frequency): log.info('Skipping syncback', reason='last syncback < syncback_frequency seconds ago', syncback_frequency=self.syncback_frequency) # Reset here so syncs can proceed self._signal_syncs() return if self.syncback_handler is None: self.syncback_handler = SyncbackHandler(self.account_id, self.namespace_id, self.provider_name) try: interval = ((datetime.utcnow() - self.syncback_timestamp).seconds if self.syncback_timestamp else None) log.info('Performing syncback', syncback_interval_in_seconds=interval) self.syncback_handler.send_client_changes() self.syncback_timestamp = datetime.utcnow() except Exception: # Log, set self.folder_sync_signals and then re-raise (so the # greenlet can be restarted etc.) log.error('Critical syncback error', exc_info=True) raise finally: # Reset here so syncs can proceed self._signal_syncs() def sync(self): try: self.start_delete_handler() self.start_new_folder_sync_engines() while True: sleep(self.syncback_frequency) self.perform_syncback() self.start_new_folder_sync_engines() except ValidationError as exc: log.error('Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, logstash_tag='mark_invalid') with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc)) def _add_sync_signal(self, folder_name): self.folder_sync_signals[folder_name] = Event() self.folder_sync_signals[folder_name].set() def _remove_sync_signal(self, folder_name): del self.folder_sync_signals[folder_name] def _can_syncback(self): """ Determine if syncback can occur. If all folder syncs are paused as indicated by the folder_sync_signals, returns True. Else, returns False. """ return (not self.folder_sync_signals or all(not signal.is_set() for signal in self.folder_sync_signals.values())) def _signal_syncs(self): """ Indicate that folder syncs can resume. """ for signal in self.folder_sync_signals.values(): signal.set()
class Consumer(object): #: The debug flag #: #: This attribute can also be configured from the config with the ``DEBUG`` #: configuration key. Defaults to ``False``. debug: ConfigAttribute = ConfigAttribute('DEBUG') #: A :class:`~datetime.timedelta` which is used as #: shard iterator interval. #: #: This attribute can also be configured from the config with #: ``SHARD_ITERATOR_INTERVAL`` configuration key. Defaults to #: ``timedelta(seconds=1)`` shard_iterator_interval: ConfigAttribute = ConfigAttribute( 'SHARD_ITERATOR_INTERVAL', get_converter=_make_timedelta ) #: A :class:`~datetime.timedelta` which is used as #: shard monitoring interval. #: #: This attribute can also be configured from the config with #: ``SHARD_MONITORING_INTERVAL`` configuration key. Defaults to #: ``timedelta(hours=1)`` shard_monitoring_interval: ConfigAttribute = ConfigAttribute( 'SHARD_MONITORING_INTERVAL', get_converter=_make_timedelta ) #: A :class:`~datetime.timedelta` which is used as overhang interval. #: #: This attribute can also be configured from the config with #: ``PROTRACTOR_OVERHANG_INTERVAL`` configuration key. Defaults to #: ``timedelta(seconds=30)`` protractor_overhang_interval: ConfigAttribute = ConfigAttribute( 'PROTRACTOR_OVERHANG_INTERVAL', get_converter=_make_timedelta ) #: Default configuration parameters. __default_config: ImmutableDict = ImmutableDict({ 'DEBUG': False, 'STREAM_REGION': 'ap-south-1', 'STREAM_NAME': None, 'SHARD_ITERATOR_TYPE': 'TRIM_HORIZON', 'SHARD_READ_LIMIT': 50, 'SHARD_ITERATOR_INTERVAL': timedelta(seconds=1), 'SHARD_MONITORING_INTERVAL': timedelta(hours=1), 'PROTRACTOR_ENABLE': False, 'PROTRACTOR_OVERHANG_INTERVAL': timedelta(seconds=30), 'LOGGER_HANDLER_POLICY': 'always', 'LOG_ROLLOVER': 'd', 'LOG_INTERVAL': 1, 'LOG_BACKUP_COUNT': 2, 'BUCKET_SIZE_LIMIT': 10000, 'BUCKET_COUNT_LIMIT': 120, }) #: The name of the package or module that this consumer belongs to. #: Do not change this once it is set by the constructor. import_name: str = None #: Absolute path to the package on the filesystem. root_path: str = None def __init__(self, import_name: str, root_path: str = None, stream_region: str = None, stream_name: str = None, log_folder: str = 'log', checkpointer: Checkpointer = None) -> None: self.import_name = import_name if root_path is None: root_path = _get_root_path(import_name) self.root_path = root_path self.log_folder = log_folder #: The configuration directory as :class:`Config`. self.config = Config(self.root_path, self.__default_config) if stream_region is not None: self.config['STREAM_REGION'] = stream_region if stream_name is not None: self.config['STREAM_NAME'] = stream_name #: self.checkpointer = checkpointer if self.checkpointer is None: self.checkpointer = InMemoryCheckpointer() #: A list of functions that will be called at the bucket is full. self.__transform_funcs = [] #: A list of functions that should be called after transform. self.__after_consume_func = [] #: A list of functions that are called when the consumer context #: is destroyed. Since the consumer context is also torn down self.__teardown_consumer_func = [] #: self.__threads = Group() self.shards = set() @locked_cached_property def name(self) -> str: if self.import_name == '__main__': fn = getattr(sys.modules['__main__'], '__file__', None) if fn is None: return '__main__' return os.path.splitext(os.path.basename(fn))[0] return self.import_name @locked_cached_property def logger(self) -> _Logger: return create_logger(self) @locked_cached_property def kinesis_client(self): return boto3.client('kinesis', region_name=self.config['STREAM_REGION']) @typechecked def transform(self, func: Callable[[List[Any], str, str, datetime], List[Any]]) -> Callable: self.__transform_funcs.append(func) return func @typechecked def after_consume(self, func: Callable[[Optional[List[Any]], str, Optional[str], Optional[datetime]], None]) -> Callable: self.__after_consume_func.append(func) return func @typechecked def teardown_consumer(self, func: Callable[[Any], None]) -> Callable: self.__teardown_consumer_func.append(func) return func @typechecked def do_transform(self, data: List[Any], shard_id: str, last_sequence_number: str, last_arrival_timestamp: datetime) -> List[Any]: for func in reversed(self.__transform_funcs): data = func( data, shard_id, last_sequence_number, last_arrival_timestamp ) return data @typechecked def do_after_consume(self, data: Optional[List[Any]], shard_id: str, last_sequence_number: Optional[str], last_arrival_timestamp: Optional[datetime]) -> None: for func in reversed(self.__after_consume_func): func( data, shard_id, last_sequence_number, last_arrival_timestamp ) @typechecked def do_teardown_consumer(self, exc=_sentinel) -> None: if exc is _sentinel: exc = sys.exc_info()[1] for func in reversed(self.__teardown_consumer_func): func(exc) def handle_shard_exception(self, e) -> None: exc_type, exc_value, tb = sys.exc_info() assert exc_value is e self.log_exception((exc_type, exc_value, tb)) def handle_exception(self, e) -> None: exc_type, exc_value, tb = sys.exc_info() self.log_exception((exc_type, exc_value, tb)) def log_exception(self, exc_info) -> None: if has_shard_context(): self.logger.error( 'Exception on {0}'.format(current_shard.id), exc_info=exc_info ) else: self.logger.error( 'Exception', exc_info=exc_info ) def get_context(self) -> ConsumerContext: return ConsumerContext(self) def get_stream(self) -> KinesisStream: return KinesisStream(self.kinesis_client.describe_stream( StreamName=self.config['STREAM_NAME'] )) def dispatch(self) -> None: stream = self.get_stream() if stream.status == 'ACTIVE': gevent.signal(signal.SIGQUIT, gevent.killall) shards = stream.get_shards(self) for shard in shards: self.spawn_shard(shard) self.__threads.start(ShardMonitor(self)) self.__threads.join() else: sys.exit() def spawn_shard(self, shard: KinesisShard) -> None: self.__threads.start(shard) self.shards.add(shard) def close_shard(self, shard: KinesisShard) -> None: self.logger.warn('Stream \'{0}\' Shard \'{1}\' closed'.format( self.config['STREAM_NAME'], shard.id )) self.shards.remove(shard) def process(self, debug=None) -> None: if debug is not None: self.debug = bool(debug) ctx = self.get_context() error = None try: try: ctx.push() self.dispatch() except Exception as e: error = e self.handle_exception(e) finally: ctx.pop(error) def __repr__(self) -> str: return '<{0!s} {1!r} - \'{2!s}\'>'.format( self.__class__.__name__, self.name, self.config['STREAM_NAME'] )
def start(self,greenlet=None): """Start the greenlet pool or add a greenlet to the pool.""" if greenlet is not None: return Group.start(self,greenlet)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. poll_frequency: Integer Seconds to wait between polling for the greenlets spawned refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.refresh_frequency = refresh_frequency self.poll_frequency = poll_frequency self.syncmanager_lock = BoundedSemaphore(1) self.refresh_flags_max = refresh_flags_max provider_supports_condstore = account.provider_info.get( 'condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) @retry_crispin def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError( "Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info('Folder sync engine started', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) thread = self.sync_engine_class( self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) self.folder_monitors.start(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): log.info('Folder sync engine finished/killed', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id)) def start_delete_handler(self): self.delete_handler = DeleteHandler(account_id=self.account_id, namespace_id=self.namespace_id, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def sync(self): self.start_delete_handler() folders = set() self.start_new_folder_sync_engines(folders) while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines(folders)
def main(workers): queue = SubmitQueue() worker_group = Group() for _ in range(workers): worker_group.start(SubmitWorker(queue)) worker_group.join()
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. """ def __init__(self, account, heartbeat=1, refresh_frequency=30): self.refresh_frequency = refresh_frequency self.syncmanager_lock = BoundedSemaphore(1) self.saved_remote_folders = None self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.delete_handler = None BaseMailSyncMonitor.__init__(self, account, heartbeat) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of folder names for the folders we want to sync (in order). """ with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() # The folders we should be syncing sync_folders = crispin_client.sync_folders() if self.saved_remote_folders != remote_folders: with session_scope(self.namespace_id) as db_session: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders return sync_folders def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH] for f in raw_folders} assert 'inbox' in {f.role for f in raw_folders},\ 'Account {} has no detected inbox folder'.\ format(account.email_address) local_folders = {f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id)} # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info('Folder deleted from remote', account_id=self.account_id, name=name) cat = db_session.query(Category).get( local_folders[name].category_id) if cat is not None: db_session.delete(cat) del local_folders[name] # Create new folders for raw_folder in raw_folders: Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self): running_monitors = {monitor.folder_name: monitor for monitor in self.folder_monitors} for folder_name in self.prepare_sync(): if folder_name in running_monitors: thread = running_monitors[folder_name] else: log.info('Folder sync engine started', account_id=self.account_id, folder_name=folder_name) thread = self.sync_engine_class(self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock) self.folder_monitors.start(thread) while not thread_polling(thread) and not thread.ready(): sleep(self.heartbeat) if thread.ready(): log.info('Folder sync engine exited', account_id=self.account_id, folder_name=folder_name, error=thread.exception) def start_delete_handler(self): if self.delete_handler is None: self.delete_handler = DeleteHandler( account_id=self.account_id, namespace_id=self.namespace_id, provider_name=self.provider_name, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def sync(self): try: self.start_delete_handler() self.start_new_folder_sync_engines() while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines() except ValidationError as exc: log.error( 'Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, logstash_tag='mark_invalid') with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc))
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- poll_frequency: Integer Seconds to wait between polling for the greenlets spawned heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.poll_frequency = poll_frequency self.syncmanager_lock = db_write_lock(account.namespace.id) self.refresh_flags_max = refresh_flags_max provider_supports_condstore = account.provider_info.get('condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.sync_status_queue = Queue() self.folder_monitors.start(Greenlet(self.sync_status_consumer)) BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ sync_folder_names_ids = self.prepare_sync() for folder_name, folder_id in sync_folder_names_ids: log.info('initializing folder sync') thread = self.sync_engine_class(self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes, self.sync_status_queue) thread.start() self.folder_monitors.add(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if thread_finished(thread) or thread.ready(): log.info('folder sync finished/killed', folder_name=thread.folder_name) # NOTE: Greenlet is automatically removed from the group. self.folder_monitors.join() def sync_status_consumer(self): """Consume per-monitor sync status queue and update the ImapFolderSyncStatus table accordingly. Nothing fancy is happening as of now but here we may implement some batching to reduce the stress of the database.""" while True: folder_id, state = self.sync_status_queue.get() with mailsync_session_scope() as db_session: sync_status_entry = db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id, folder_id=folder_id)\ .options(load_only(ImapFolderSyncStatus.state)).one() sync_status_entry.state = state db_session.add(sync_status_entry) db_session.commit()
class Server(gevent.Greenlet): def __init__(self, config, context=None, quiet=False): super(Server, self).__init__() self.config = config self.context = context or zmq.Context.instance() self.quiet = quiet # dict of the zeromq sockets we use self.sockets = {} _collect = self.context.socket(zmq.SUB) _collect.setsockopt_string(zmq.SUBSCRIBE, '') _collect.bind(zerolog.get_endpoint(self.config['endpoints']['collect'])) self.sockets['collect'] = _collect _publish = self.context.socket(zmq.XPUB) _publish.hwm = 100000 _publish.linger = 1000 _publish.setsockopt(zmq.XPUB_VERBOSE, 1) _publish.bind(zerolog.get_endpoint(self.config['endpoints']['publish'])) self.sockets['publish'] = _publish _control = self.context.socket(zmq.ROUTER) _control.linger = 0 _control.bind(zerolog.get_endpoint(self.config['endpoints']['control'])) self.sockets['control'] = _control self.manager = ConfigManager(self.sockets['publish'], self.config) self.controller = Controller(self.sockets['control'], self.manager) self.dispatcher = Dispatcher(self.sockets['collect'], self.sockets['publish'], quiet=self.quiet) self.greenlets = Group() self.log = logging.getLogger('zerolog') self._keep_going = True def _run(self): self.greenlets.start(self.manager) self.greenlets.start(self.controller) self.greenlets.start(self.dispatcher) #self.greenlets.add(gevent.spawn(self.__client_emulator)) self.greenlets.join() def kill(self, exception=gevent.GreenletExit, **kwargs): self._keep_going = False self.greenlets.kill() for _socket in self.sockets.values(): _socket.close() super(Server, self).kill(exception=exception, **kwargs) def __client_emulator(self): """Emulate a tool/sysadmin changing log levels. """ levels = 'critical error warning info debug'.split() import random while self._keep_going: loggers = list(self.manager.subscribed_loggers) self.log.info('subscribed loggers: {0}'.format(loggers)) if loggers: logger_name = random.choice(list(loggers)) self.manager.update(logger_name, { 'level': random.choice(levels), 'propagate': random.choice([0,1]), }) self.manager.configure(logger_name) gevent.sleep(5)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_frequency: Integer Seconds to wait between checking for new folders to sync. poll_frequency: Integer Seconds to wait between polling for the greenlets spawned refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.refresh_frequency = refresh_frequency self.poll_frequency = poll_frequency self.syncmanager_lock = BoundedSemaphore(1) self.refresh_flags_max = refresh_flags_max provider_supports_condstore = account.provider_info.get('condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) @retry_crispin def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info('Folder sync engine started', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) thread = self.sync_engine_class(self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) self.folder_monitors.start(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): log.info('Folder sync engine finished/killed', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id)) def start_delete_handler(self): self.delete_handler = DeleteHandler(account_id=self.account_id, namespace_id=self.namespace_id, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def sync(self): self.start_delete_handler() folders = set() self.start_new_folder_sync_engines(folders) while True: sleep(self.refresh_frequency) self.start_new_folder_sync_engines(folders)
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- heartbeat: Integer Seconds to wait between checking on folder sync threads. (DEPRECATED) refresh_frequency: Integer Seconds to wait between checking for new folders to sync. syncback_frequency: Integer Seconds to wait between performing consecutive syncback iterations and checking for new folders to sync. """ def __init__(self, account, heartbeat=1, refresh_frequency=30, syncback_frequency=5): # DEPRECATED. # TODO[k]: Remove after sync-syncback integration deploy is complete. self.refresh_frequency = refresh_frequency self.syncmanager_lock = BoundedSemaphore(1) self.saved_remote_folders = None self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.delete_handler = None self.syncback_handler = None self.folder_sync_signals = {} self.syncback_timestamp = None self.syncback_frequency = syncback_frequency BaseMailSyncMonitor.__init__(self, account, heartbeat) @retry_crispin def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of folder names for the folders we want to sync (in order). """ with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() # The folders we should be syncing sync_folders = crispin_client.sync_folders() if self.saved_remote_folders != remote_folders: with session_scope(self.namespace_id) as db_session: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders return sync_folders def save_folder_names(self, db_session, raw_folders): """ Save the folders present on the remote backend for an account. * Create Folder objects. * Delete Folders that no longer exist on the remote. Notes ----- Generic IMAP uses folders (not labels). Canonical folders ('inbox') and other folders are created as Folder objects only accordingly. We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise - code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(self.account_id) remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH] for f in raw_folders} assert 'inbox' in {f.role for f in raw_folders},\ 'Account {} has no detected inbox folder'.\ format(account.email_address) local_folders = {f.name: f for f in db_session.query(Folder).filter( Folder.account_id == self.account_id)} # Delete folders no longer present on the remote. # Note that the folder with canonical_name='inbox' cannot be deleted; # remote_folder_names will always contain an entry corresponding to it. discard = set(local_folders) - remote_folder_names for name in discard: log.info('Folder deleted from remote', account_id=self.account_id, name=name) if local_folders[name].category_id is not None: cat = db_session.query(Category).get( local_folders[name].category_id) if cat is not None: db_session.delete(cat) del local_folders[name] # Create new folders for raw_folder in raw_folders: Folder.find_or_create(db_session, account, raw_folder.display_name, raw_folder.role) # Set the should_run bit for existing folders to True (it's True by # default for new ones.) for f in local_folders.values(): if f.imapsyncstatus: f.imapsyncstatus.sync_should_run = True db_session.commit() def start_new_folder_sync_engines(self): running_monitors = {monitor.folder_name: monitor for monitor in self.folder_monitors} with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).options( load_only('_sync_status')).get(self.account_id) s3_resync = account._sync_status.get('s3_resync', False) for folder_name in self.prepare_sync(): if folder_name in running_monitors: thread = running_monitors[folder_name] else: log.info('Folder sync engine started', account_id=self.account_id, folder_name=folder_name) self._add_sync_signal(folder_name) thread = self.sync_engine_class(self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock, self.folder_sync_signals[folder_name]) self.folder_monitors.start(thread) if s3_resync: log.info('Starting an S3 monitor', account_id=self.account_id) s3_thread = S3FolderSyncEngine(self.account_id, self.namespace_id, folder_name, self.email_address, self.provider_name, self.syncmanager_lock, None) self.folder_monitors.start(s3_thread) while not thread.state == 'poll' and not thread.ready(): sleep(self.heartbeat) self.perform_syncback() if thread.ready(): self._remove_sync_signal[folder_name] log.info('Folder sync engine exited', account_id=self.account_id, folder_name=folder_name, error=thread.exception) def start_delete_handler(self): if self.delete_handler is None: self.delete_handler = DeleteHandler( account_id=self.account_id, namespace_id=self.namespace_id, provider_name=self.provider_name, uid_accessor=lambda m: m.imapuids) self.delete_handler.start() def perform_syncback(self): """ Perform syncback for the account. Syncback is performed iff all folder syncs are paused, and the previous syncback occurred more than syncback_frequency seconds ago. The first condition is checked by the call to _can_syncback(). The second condition is needed because if there are a large number of pending actions during initial sync, it could repeatedly get interrupted and put on hold for seconds at a time. """ from inbox.syncback.base import SyncbackHandler if not self._can_syncback(): log.info('Skipping syncback', reason='folder syncs running') return if (self.syncback_timestamp and (datetime.utcnow() - self.syncback_timestamp).seconds < self.syncback_frequency): log.info('Skipping syncback', reason='last syncback < syncback_frequency seconds ago', syncback_frequency=self.syncback_frequency) # Reset here so syncs can proceed self._signal_syncs() return if self.syncback_handler is None: self.syncback_handler = SyncbackHandler(self.account_id, self.namespace_id, self.provider_name) try: interval = ((datetime.utcnow() - self.syncback_timestamp).seconds if self.syncback_timestamp else None) log.info('Performing syncback', syncback_interval_in_seconds=interval) self.syncback_handler.send_client_changes() self.syncback_timestamp = datetime.utcnow() except Exception: # Log, set self.folder_sync_signals and then re-raise (so the # greenlet can be restarted etc.) log.error('Critical syncback error', exc_info=True) raise finally: # Reset here so syncs can proceed self._signal_syncs() def sync(self): try: self.start_delete_handler() self.start_new_folder_sync_engines() while True: sleep(self.syncback_frequency) self.perform_syncback() self.start_new_folder_sync_engines() except ValidationError as exc: log.error( 'Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, logstash_tag='mark_invalid') with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc)) def _add_sync_signal(self, folder_name): self.folder_sync_signals[folder_name] = Event() self.folder_sync_signals[folder_name].set() def _remove_sync_signal(self, folder_name): del self.folder_sync_signals[folder_name] def _can_syncback(self): """ Determine if syncback can occur. If all folder syncs are paused as indicated by the folder_sync_signals, returns True. Else, returns False. """ return (not self.folder_sync_signals or all(not signal.is_set() for signal in self.folder_sync_signals.values())) def _signal_syncs(self): """ Indicate that folder syncs can resume. """ for signal in self.folder_sync_signals.values(): signal.set()