def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, poll_frequency, syncmanager_lock, refresh_flags_max, retry_fail_classes): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id self.poll_frequency = poll_frequency self.syncmanager_lock = syncmanager_lock self.refresh_flags_max = refresh_flags_max self.retry_fail_classes = retry_fail_classes self.state = None self.provider_name = provider_name with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) self.throttled = account.throttled self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, 'finish': lambda self: 'finish', } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self.provider_name, folder_name=self.folder_name)
def __init__(self, account_id, namespace_id, poll_frequency, folder_id, folder_name, provider_name): self.shutdown = gevent.event.Event() self.account_id = account_id self.namespace_id = namespace_id self.poll_frequency = poll_frequency self.log = logger.new(account_id=account_id) self.folder_id = folder_id self.folder_name = folder_name self._provider_name = provider_name self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self._provider_name, folder_name=self.folder_name) gevent.Greenlet.__init__(self)
class FolderSyncEngine(Greenlet): """Base class for a per-folder IMAP sync engine.""" def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, poll_frequency, syncmanager_lock, refresh_flags_max, retry_fail_classes): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id self.poll_frequency = poll_frequency self.syncmanager_lock = syncmanager_lock self.refresh_flags_max = refresh_flags_max self.retry_fail_classes = retry_fail_classes self.state = None self.provider_name = provider_name with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) self.throttled = account.throttled self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, 'finish': lambda self: 'finish', } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self.provider_name, folder_name=self.folder_name) def _run(self): # Bind greenlet-local logging context. log.new(account_id=self.account_id, folder=self.folder_name) return retry_and_report_killed(self._run_impl, account_id=self.account_id, folder_name=self.folder_name, logger=log, fail_classes=self.retry_fail_classes) def _run_impl(self): # We defer initializing the pool to here so that we'll retry if there # are any errors (remote server 503s or similar) when initializing it. self.conn_pool = _pool(self.account_id) # We do NOT ignore soft deletes in the mail sync because it gets real # complicated handling e.g. when backends reuse imapids. ImapUid # objects are the only objects deleted by the mail sync backends # anyway. saved_folder_status = self._load_state() # eagerly signal the sync status self.heartbeat_status.publish(state=self.state) # NOTE: The parent ImapSyncMonitor handler could kill us at any # time if it receives a shutdown command. The shutdown command is # equivalent to ctrl-c. while True: old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + ' uidinvalid' # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: # Don't need to re-query, will auto refresh on re-associate. with mailsync_session_scope() as db_session: db_session.add(saved_folder_status) saved_folder_status.state = self.state db_session.commit() if self.state == 'finish': return def _load_state(self): with mailsync_session_scope() as db_session: try: state = ImapFolderSyncStatus.state saved_folder_status = db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id, folder_id=self.folder_id).options( load_only(state)).one() except NoResultFound: saved_folder_status = ImapFolderSyncStatus( account_id=self.account_id, folder_id=self.folder_id) db_session.add(saved_folder_status) saved_folder_status.start_sync() db_session.commit() self.state = saved_folder_status.state return saved_folder_status @retry_crispin def initial_sync(self): log.bind(state='initial') log.info('starting initial sync') with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) self.initial_sync_impl(crispin_client) return 'poll' @retry_crispin def poll(self): log.bind(state='poll') log.info('polling') self.poll_impl() return 'poll' @retry_crispin def resync_uids(self): log.bind(state=self.state) log.info('UIDVALIDITY changed') self.resync_uids_impl() return 'initial' def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) self.remove_deleted_uids(db_session, local_uids, remote_uids) new_uids = set(remote_uids) - local_uids download_stack = UIDStack() for uid in sorted(new_uids): download_stack.put(uid, GenericUIDMetadata(self.throttled)) with mailsync_session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes, download_stack) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) self.download_uids(crispin_client, download_stack) finally: if change_poller is not None: change_poller.kill() def poll_impl(self): with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) download_stack = UIDStack() self.check_uid_changes(crispin_client, download_stack, async_download=False) sleep(self.poll_frequency) def resync_uids_impl(self): # First check if the changed UIDVALIDITY we got from the remote was # spurious. with mailsync_session_scope() as db_session: imap_folder_info_entry = db_session.query(ImapFolderInfo). \ filter(ImapFolderInfo.account_id == self.account_id, ImapFolderInfo.folder_id == self.folder_id).one() saved_uidvalidity = imap_folder_info_entry.uidvalidity with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, lambda *args: True) if crispin_client.selected_uidvalidity <= saved_uidvalidity: log.debug('UIDVALIDITY unchanged') return # TODO: Implement actual UID resync. raise NotImplementedError @retry_crispin def poll_for_changes(self, download_stack): while True: with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) self.check_uid_changes(crispin_client, download_stack, async_download=True) sleep(self.poll_frequency) def download_uids(self, crispin_client, download_stack): while not download_stack.empty(): # Defer removing UID from queue until after it's committed to the # DB' to avoid races with poll_for_changes(). uid, metadata = download_stack.peek() self.download_and_commit_uids(crispin_client, self.folder_name, [uid]) download_stack.get() report_progress(self.account_id, self.folder_name, 1, download_stack.qsize()) self.heartbeat_status.publish() if self.throttled and metadata is not None and metadata.throttled: # Check to see if the account's throttled state has been # modified. If so, immediately accelerate. with mailsync_session_scope() as db_session: acc = db_session.query(Account).get(self.account_id) self.throttled = acc.throttled if self.throttled: log.debug('throttled; sleeping') sleep(THROTTLE_WAIT) def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = db_session.query(ImapUid).filter( ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid).first() if existing_imapuid is not None: log.error('Expected to create imapuid, but existing row found', remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id) return None new_uid = common.create_imap_message(db_session, log, acct, folder, msg) new_uid = self.add_message_attrs(db_session, new_uid, msg) return new_uid def fetch_similar_threads(self, db_session, new_uid): # FIXME: restrict this to messages in the same folder? clean_subject = cleanup_subject(new_uid.message.subject) # Return similar threads ordered by descending id, so that we append # to the most recently created similar thread. return db_session.query(ImapThread).filter( ImapThread.namespace_id == self.namespace_id, ImapThread.subject.like(clean_subject)). \ order_by(desc(ImapThread.id)).all() def add_message_attrs(self, db_session, new_uid, msg): """ Post-create-message bits.""" with db_session.no_autoflush: parent_threads = self.fetch_similar_threads(db_session, new_uid) construct_new_thread = True if parent_threads: # If there's a parent thread that isn't too long already, # add to it. Otherwise create a new thread. parent_thread = parent_threads[0] parent_message_count, = db_session.query( func.count(Message.id)). \ filter(Message.thread_id == parent_thread.id).one() if parent_message_count < MAX_THREAD_LENGTH: construct_new_thread = False if construct_new_thread: new_uid.message.thread = ImapThread.from_imap_message( db_session, new_uid.account.namespace, new_uid.message) new_uid.message.thread_order = 0 else: parent_thread = parent_threads[0] parent_thread.messages.append(new_uid.message) constructed_thread = thread_messages(parent_thread.messages) for index, message in enumerate(constructed_thread): message.thread_order = index db_session.flush() # Make sure this thread has all the correct labels common.add_any_new_thread_labels(new_uid.message.thread, new_uid, db_session) new_uid.update_flags_and_labels(msg.flags) return new_uid def remove_deleted_uids(self, db_session, local_uids, remote_uids): """ Remove imapuid entries that no longer exist on the remote. Works as follows: 1. Do a LIST on the current folder to see what messages are on the server. 2. Compare to message uids stored locally. 3. Purge uids we have locally but not on the server. Ignore remote uids that aren't saved locally. Make SURE to be holding `syncmanager_lock` when calling this function; we do not grab it here to allow callers to lock higher level functionality. """ to_delete = set(local_uids) - set(remote_uids) common.remove_deleted_uids(self.account_id, db_session, to_delete, self.folder_id) def download_and_commit_uids(self, crispin_client, folder_name, uids): # Note that folder_name here might *NOT* be equal to self.folder_name, # because, for example, we download messages via the 'All Mail' folder # in Gmail. raw_messages = safe_download(crispin_client, uids) if not raw_messages: return 0 with self.syncmanager_lock: with mailsync_session_scope() as db_session: new_imapuids = create_db_objects(self.account_id, db_session, log, folder_name, raw_messages, self.create_message) commit_uids(db_session, new_imapuids, self.provider_name) return len(new_imapuids) def update_metadata(self, crispin_client, updated): """ Update flags (the only metadata that can change). """ # bigger chunk because the data being fetched here is very small for uids in chunk(updated, 5 * crispin_client.CHUNK_SIZE): new_flags = crispin_client.flags(uids) # Messages can disappear in the meantime; we'll update them next # sync. uids = [uid for uid in uids if uid in new_flags] with self.syncmanager_lock: with mailsync_session_scope() as db_session: common.update_metadata(self.account_id, db_session, self.folder_name, self.folder_id, uids, new_flags) db_session.commit() def update_uid_counts(self, db_session, **kwargs): saved_status = db_session.query(ImapFolderSyncStatus).join(Folder). \ filter(ImapFolderSyncStatus.account_id == self.account_id, Folder.name == self.folder_name).one() # We're not updating the current_remote_count metric # so don't update uid_checked_timestamp. if kwargs.get('remote_uid_count') is None: saved_status.update_metrics(kwargs) else: metrics = dict(uid_checked_timestamp=datetime.utcnow()) metrics.update(kwargs) saved_status.update_metrics(metrics) def check_uid_changes(self, crispin_client, download_stack, async_download): remote_uids = set(crispin_client.all_uids()) with self.syncmanager_lock: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) # Download new UIDs. stack_uids = {uid for uid, _ in download_stack} local_with_pending_uids = local_uids | stack_uids # filter out messages that have disappeared on the remote side download_stack.discard([ item for item in download_stack if item[0] not in remote_uids ]) for uid in sorted(remote_uids): if uid not in local_with_pending_uids: download_stack.put(uid, None) self.remove_deleted_uids(db_session, local_uids, remote_uids) if not async_download: self.download_uids(crispin_client, download_stack) with mailsync_session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=download_stack.qsize()) to_refresh = sorted(remote_uids & local_uids)[-self.refresh_flags_max:] self.update_metadata(crispin_client, to_refresh)
class BaseSync(gevent.Greenlet): def __init__(self, account_id, namespace_id, poll_frequency, folder_id, folder_name, provider_name): self.shutdown = gevent.event.Event() self.account_id = account_id self.namespace_id = namespace_id self.poll_frequency = poll_frequency self.log = logger.new(account_id=account_id) self.folder_id = folder_id self.folder_name = folder_name self._provider_name = provider_name self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self._provider_name, folder_name=self.folder_name) gevent.Greenlet.__init__(self) def _run(self): return retry_with_logging(self._run_impl, self.log, account_id=self.account_id) def _run_impl(self): try: self.provider_instance = self.provider(self.account_id, self.namespace_id) while True: # Check to see if this greenlet should exit if self.shutdown.is_set(): clear_heartbeat_status(self.account_id, self.folder_id) return False try: self.poll() self.heartbeat_status.publish(state='poll') # If we get a connection or API permissions error, then sleep # 2x poll frequency. except ConnectionError: self.log.error('Error while polling', exc_info=True) self.heartbeat_status.publish(state='poll error') gevent.sleep(self.poll_frequency) gevent.sleep(self.poll_frequency) except ValidationError: # Bad account credentials; exit. self.log.error('Error while establishing the connection', exc_info=True) return False @property def target_obj(self): raise NotImplementedError # return Contact or Event @property def provider(self): raise NotImplementedError # Implement in subclasses @property def provider_name(self): raise NotImplementedError # Implement in subclasses def last_sync(self, account): raise NotImplementedError # Implement in subclasses def poll(self): return base_poll(self.account_id, self.provider_instance, self.last_sync, self.target_obj, self.set_last_sync)
class FolderSyncEngine(Greenlet): """Base class for a per-folder IMAP sync engine.""" def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, poll_frequency, syncmanager_lock, refresh_flags_max, retry_fail_classes): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id self.poll_frequency = poll_frequency self.syncmanager_lock = syncmanager_lock self.refresh_flags_max = refresh_flags_max self.retry_fail_classes = retry_fail_classes self.state = None self.provider_name = provider_name with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) self.throttled = account.throttled self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, 'finish': lambda self: 'finish', } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self.provider_name, folder_name=self.folder_name) def _run(self): # Bind greenlet-local logging context. log.new(account_id=self.account_id, folder=self.folder_name) return retry_and_report_killed(self._run_impl, account_id=self.account_id, folder_name=self.folder_name, logger=log, fail_classes=self.retry_fail_classes) def _run_impl(self): # We defer initializing the pool to here so that we'll retry if there # are any errors (remote server 503s or similar) when initializing it. self.conn_pool = _pool(self.account_id) # We do NOT ignore soft deletes in the mail sync because it gets real # complicated handling e.g. when backends reuse imapids. ImapUid # objects are the only objects deleted by the mail sync backends # anyway. saved_folder_status = self._load_state() # eagerly signal the sync status self.heartbeat_status.publish(state=self.state) # NOTE: The parent ImapSyncMonitor handler could kill us at any # time if it receives a shutdown command. The shutdown command is # equivalent to ctrl-c. while True: old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + ' uidinvalid' # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: # Don't need to re-query, will auto refresh on re-associate. with mailsync_session_scope() as db_session: db_session.add(saved_folder_status) saved_folder_status.state = self.state db_session.commit() if self.state == 'finish': return def _load_state(self): with mailsync_session_scope() as db_session: try: state = ImapFolderSyncStatus.state saved_folder_status = db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id, folder_id=self.folder_id).options( load_only(state)).one() except NoResultFound: saved_folder_status = ImapFolderSyncStatus( account_id=self.account_id, folder_id=self.folder_id) db_session.add(saved_folder_status) saved_folder_status.start_sync() db_session.commit() self.state = saved_folder_status.state return saved_folder_status @retry_crispin def initial_sync(self): log.bind(state='initial') log.info('starting initial sync') with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) self.initial_sync_impl(crispin_client) return 'poll' @retry_crispin def poll(self): log.bind(state='poll') log.info('polling') self.poll_impl() return 'poll' @retry_crispin def resync_uids(self): log.bind(state=self.state) log.info('UIDVALIDITY changed') self.resync_uids_impl() return 'initial' def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) self.remove_deleted_uids(db_session, local_uids, remote_uids) new_uids = set(remote_uids) - local_uids download_stack = UIDStack() for uid in sorted(new_uids): download_stack.put( uid, GenericUIDMetadata(self.throttled)) with mailsync_session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes, download_stack) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) self.download_uids(crispin_client, download_stack) finally: if change_poller is not None: change_poller.kill() def poll_impl(self): with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) download_stack = UIDStack() self.check_uid_changes(crispin_client, download_stack, async_download=False) sleep(self.poll_frequency) def resync_uids_impl(self): # First check if the changed UIDVALIDITY we got from the remote was # spurious. with mailsync_session_scope() as db_session: imap_folder_info_entry = db_session.query(ImapFolderInfo). \ filter(ImapFolderInfo.account_id == self.account_id, ImapFolderInfo.folder_id == self.folder_id).one() saved_uidvalidity = imap_folder_info_entry.uidvalidity with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, lambda *args: True) if crispin_client.selected_uidvalidity <= saved_uidvalidity: log.debug('UIDVALIDITY unchanged') return # TODO: Implement actual UID resync. raise NotImplementedError @retry_crispin def poll_for_changes(self, download_stack): while True: with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) self.check_uid_changes(crispin_client, download_stack, async_download=True) sleep(self.poll_frequency) def download_uids(self, crispin_client, download_stack): while not download_stack.empty(): # Defer removing UID from queue until after it's committed to the # DB' to avoid races with poll_for_changes(). uid, metadata = download_stack.peek() self.download_and_commit_uids(crispin_client, self.folder_name, [uid]) download_stack.get() report_progress(self.account_id, self.folder_name, 1, download_stack.qsize()) self.heartbeat_status.publish() if self.throttled and metadata is not None and metadata.throttled: # Check to see if the account's throttled state has been # modified. If so, immediately accelerate. with mailsync_session_scope() as db_session: acc = db_session.query(Account).get(self.account_id) self.throttled = acc.throttled if self.throttled: log.debug('throttled; sleeping') sleep(THROTTLE_WAIT) def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = db_session.query(ImapUid).filter( ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid).first() if existing_imapuid is not None: log.error('Expected to create imapuid, but existing row found', remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id) return None new_uid = common.create_imap_message(db_session, log, acct, folder, msg) new_uid = self.add_message_attrs(db_session, new_uid, msg) return new_uid def fetch_similar_threads(self, db_session, new_uid): # FIXME: restrict this to messages in the same folder? clean_subject = cleanup_subject(new_uid.message.subject) # Return similar threads ordered by descending id, so that we append # to the most recently created similar thread. return db_session.query(ImapThread).filter( ImapThread.namespace_id == self.namespace_id, ImapThread.subject.like(clean_subject)). \ order_by(desc(ImapThread.id)).all() def add_message_attrs(self, db_session, new_uid, msg): """ Post-create-message bits.""" with db_session.no_autoflush: parent_threads = self.fetch_similar_threads(db_session, new_uid) construct_new_thread = True if parent_threads: # If there's a parent thread that isn't too long already, # add to it. Otherwise create a new thread. parent_thread = parent_threads[0] parent_message_count, = db_session.query( func.count(Message.id)). \ filter(Message.thread_id == parent_thread.id).one() if parent_message_count < MAX_THREAD_LENGTH: construct_new_thread = False if construct_new_thread: new_uid.message.thread = ImapThread.from_imap_message( db_session, new_uid.account.namespace, new_uid.message) new_uid.message.thread_order = 0 else: parent_thread = parent_threads[0] parent_thread.messages.append(new_uid.message) constructed_thread = thread_messages(parent_thread.messages) for index, message in enumerate(constructed_thread): message.thread_order = index db_session.flush() # Make sure this thread has all the correct labels common.add_any_new_thread_labels(new_uid.message.thread, new_uid, db_session) new_uid.update_flags_and_labels(msg.flags) return new_uid def remove_deleted_uids(self, db_session, local_uids, remote_uids): """ Remove imapuid entries that no longer exist on the remote. Works as follows: 1. Do a LIST on the current folder to see what messages are on the server. 2. Compare to message uids stored locally. 3. Purge uids we have locally but not on the server. Ignore remote uids that aren't saved locally. Make SURE to be holding `syncmanager_lock` when calling this function; we do not grab it here to allow callers to lock higher level functionality. """ to_delete = set(local_uids) - set(remote_uids) common.remove_deleted_uids(self.account_id, db_session, to_delete, self.folder_id) def download_and_commit_uids(self, crispin_client, folder_name, uids): # Note that folder_name here might *NOT* be equal to self.folder_name, # because, for example, we download messages via the 'All Mail' folder # in Gmail. raw_messages = safe_download(crispin_client, uids) if not raw_messages: return 0 with self.syncmanager_lock: with mailsync_session_scope() as db_session: new_imapuids = create_db_objects( self.account_id, db_session, log, folder_name, raw_messages, self.create_message) commit_uids(db_session, new_imapuids, self.provider_name) return len(new_imapuids) def update_metadata(self, crispin_client, updated): """ Update flags (the only metadata that can change). """ # bigger chunk because the data being fetched here is very small for uids in chunk(updated, 5 * crispin_client.CHUNK_SIZE): new_flags = crispin_client.flags(uids) # Messages can disappear in the meantime; we'll update them next # sync. uids = [uid for uid in uids if uid in new_flags] with self.syncmanager_lock: with mailsync_session_scope() as db_session: common.update_metadata(self.account_id, db_session, self.folder_name, self.folder_id, uids, new_flags) db_session.commit() def update_uid_counts(self, db_session, **kwargs): saved_status = db_session.query(ImapFolderSyncStatus).join(Folder). \ filter(ImapFolderSyncStatus.account_id == self.account_id, Folder.name == self.folder_name).one() # We're not updating the current_remote_count metric # so don't update uid_checked_timestamp. if kwargs.get('remote_uid_count') is None: saved_status.update_metrics(kwargs) else: metrics = dict(uid_checked_timestamp=datetime.utcnow()) metrics.update(kwargs) saved_status.update_metrics(metrics) def check_uid_changes(self, crispin_client, download_stack, async_download): remote_uids = set(crispin_client.all_uids()) with self.syncmanager_lock: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) # Download new UIDs. stack_uids = {uid for uid, _ in download_stack} local_with_pending_uids = local_uids | stack_uids # filter out messages that have disappeared on the remote side download_stack.discard([item for item in download_stack if item[0] not in remote_uids]) for uid in sorted(remote_uids): if uid not in local_with_pending_uids: download_stack.put(uid, None) self.remove_deleted_uids(db_session, local_uids, remote_uids) if not async_download: self.download_uids(crispin_client, download_stack) with mailsync_session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=download_stack.qsize()) to_refresh = sorted(remote_uids & local_uids)[-self.refresh_flags_max:] self.update_metadata(crispin_client, to_refresh)
class BaseSync(gevent.Greenlet): def __init__(self, account_id, namespace_id, poll_frequency, folder_id, folder_name, provider_name): self.shutdown = gevent.event.Event() self.account_id = account_id self.namespace_id = namespace_id self.poll_frequency = poll_frequency self.log = logger.new(account_id=account_id) self.folder_id = folder_id self.folder_name = folder_name self._provider_name = provider_name self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id) self.heartbeat_status.publish(provider_name=self._provider_name, folder_name=self.folder_name) gevent.Greenlet.__init__(self) def _run(self): return retry_with_logging(self._run_impl, self.log, account_id=self.account_id) def _run_impl(self): try: self.provider_instance = self.provider(self.account_id, self.namespace_id) while True: # Check to see if this greenlet should exit if self.shutdown.is_set(): return False try: self.poll() self.heartbeat_status.publish(state='poll') # If we get a connection or API permissions error, then sleep # 2x poll frequency. except ConnectionError: self.log.error('Error while polling', exc_info=True) self.heartbeat_status.publish(state='poll error') gevent.sleep(self.poll_frequency) gevent.sleep(self.poll_frequency) except ValidationError: # Bad account credentials; exit. self.log.error('Error while establishing the connection', exc_info=True) return False @property def target_obj(self): raise NotImplementedError # return Contact or Event @property def provider(self): raise NotImplementedError # Implement in subclasses @property def provider_name(self): raise NotImplementedError # Implement in subclasses def last_sync(self, account): raise NotImplementedError # Implement in subclasses def poll(self): return base_poll(self.account_id, self.provider_instance, self.last_sync, self.target_obj, self.set_last_sync)