def highestmodseq_callback(self, crispin_client, new_uids, updated_uids, download_stack, async_download): log.debug('running highestmodseq callback') uids = new_uids + updated_uids g_metadata = crispin_client.g_metadata(uids) to_download = self.__deduplicate_message_download( crispin_client, g_metadata, uids) if self.folder_name in thread_expand_folders(crispin_client): flags = crispin_client.flags(to_download) for uid in sorted(to_download): if uid in flags and uid in g_metadata: # IMAP will just return no data for a UID if it's # disappeared from the folder in the meantime. download_stack.put( uid, GMessage(uid, g_metadata[uid], flags[uid].flags, flags[uid].labels, False)) if not async_download: # Need to select All Mail before doing thread expansion if not self.is_all_mail(crispin_client): crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb) self.__download_queued_threads(crispin_client, download_stack) if not self.is_all_mail(crispin_client): crispin_client.select_folder(self.folder_name, uidvalidity_cb) elif self.folder_name in uid_download_folders(crispin_client): for uid in sorted(to_download): download_stack.put(uid, None) if not async_download: self.download_uids(crispin_client, download_stack) else: raise MailsyncError('Unknown Gmail sync folder: {}'.format( self.folder_name))
def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order). """ with session_scope(self.namespace_id) as db_session: with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() if self.saved_remote_folders != remote_folders: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders # The folders we should be syncing sync_folders = crispin_client.sync_folders() sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error('Missing Folder object when starting sync', folder_name=folder_name) raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids
def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError( "Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids
def gmail_highestmodseq_update(crispin_client, log, folder_name, new_uids, updated_uids, syncmanager_lock): uids = new_uids + updated_uids g_metadata = crispin_client.g_metadata(uids) to_download = deduplicate_message_download( crispin_client, log, syncmanager_lock, g_metadata, uids) if folder_name == crispin_client.folder_names()['inbox']: flags = crispin_client.flags(to_download) message_download_stack = LifoQueue() for uid in to_download: if uid in flags and uid in g_metadata: # IMAP will just return no data for a UID if it's disappeared # from the folder in the meantime. message_download_stack.put(GMessage( uid, g_metadata[uid], flags[uid].flags, flags[uid].labels)) download_queued_threads(crispin_client, log, folder_name, message_download_stack, syncmanager_lock) elif folder_name in uid_download_folders(crispin_client): uid_download_stack = uid_list_to_stack(to_download) download_queued_uids(crispin_client, log, folder_name, uid_download_stack, 0, uid_download_stack.qsize(), syncmanager_lock, gmail_download_and_commit_uids, create_gmail_message) else: raise MailsyncError( 'Unknown Gmail sync folder: {}'.format(folder_name))
def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) folder_id_for = { name: id_ for id_, name in db_session.query(Folder.id, Folder.name). filter_by(account_id=self.account_id) } saved_states = { name: state for name, state in db_session.query( Folder.name, ImapFolderSyncStatus.state).join( ImapFolderSyncStatus.folder).filter( ImapFolderSyncStatus.account_id == self.account_id) } for folder_name in sync_folders: if folder_name not in folder_id_for: log.error("Missing Folder object when starting sync", folder_name=folder_name, folder_id_for=folder_id_for) raise MailsyncError("Missing Folder '{}' on account {}".format( folder_name, self.account_id)) if saved_states.get(folder_name) != 'finish': log.info('initializing folder sync') # STOPSHIP(emfree): replace by appropriate base class. thread = self.sync_engine_class( self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread) or thread.ready(): log.info('folder sync finished/killed', folder_name=thread.folder_name) # NOTE: Greenlet is automatically removed from the group. self.folder_monitors.join()
def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with session_scope(ignore_soft_deletes=False) as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(self.log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) folder_id_for = { name: id_ for id_, name in db_session.query(Folder.id, Folder.name). filter_by(account_id=self.account_id) } saved_states = { name: state for name, state in db_session.query( Folder.name, ImapFolderSyncStatus.state).join( ImapFolderSyncStatus.folder).filter( ImapFolderSyncStatus.account_id == self.account_id) } for folder_name in sync_folders: if folder_name not in folder_id_for: self.log.error("Missing Folder object when starting sync", folder_name=folder_name, folder_id_for=folder_id_for) raise MailsyncError("Missing Folder '{}' on account {}".format( folder_name, self.account_id)) if saved_states.get(folder_name) != 'finish': self.log.info('initializing folder sync') thread = ImapFolderSyncMonitor( self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider_name, self.shared_state, self.folder_state_handlers, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread): self.log.info('folder sync finished') # NOTE: Greenlet is automatically removed from the group # after finishing. self.folder_monitors.join()
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock, sync_signal): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name self.email_address = email_address if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.sync_signal = sync_signal self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } self.setup_heartbeats() Greenlet.__init__(self) # Some generic IMAP servers are throwing UIDVALIDITY # errors forever. Instead of resyncing those servers # ad vitam, we keep track of the number of consecutive # times we got such an error and bail out if it's higher than # MAX_UIDINVALID_RESYNCS. self.uidinvalid_count = 0
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the greenlets like # change_poller need to be killed when this greenlet is interrupted change_poller = None try: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) remote_uids = sorted(crispin_client.all_uids(), key=int) remote_uid_count = len(remote_uids) with self.syncmanager_lock: with mailsync_session_scope() as db_session: deleted_uids = self.remove_deleted_uids( db_session, local_uids, remote_uids) local_uids = set(local_uids) - deleted_uids unknown_uids = set(remote_uids) - local_uids self.update_uid_counts( db_session, remote_uid_count=remote_uid_count, download_uid_count=len(unknown_uids)) remote_g_metadata = crispin_client.g_metadata(unknown_uids) download_stack = UIDStack() change_poller = spawn(self.poll_for_changes, download_stack) if self.folder_name in uid_download_folders(crispin_client): full_download = self.__deduplicate_message_download( crispin_client, remote_g_metadata, unknown_uids) for uid in sorted(full_download): download_stack.put(uid, None) self.download_uids(crispin_client, download_stack) elif self.folder_name in thread_expand_folders(crispin_client): flags = crispin_client.flags(unknown_uids) for uid in sorted(unknown_uids): if uid in flags: gmessage = GMessage(uid, remote_g_metadata[uid], flags[uid].flags, flags[uid].labels, throttled=self.throttled) download_stack.put(uid, gmessage) # We always download threads via the 'All Mail' folder. crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb) self.__download_queued_threads(crispin_client, download_stack) else: raise MailsyncError('Unknown Gmail sync folder: {}'.format( self.folder_name)) finally: if change_poller is not None: change_poller.kill()
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def highestmodseq_callback(self, crispin_client, new_uids, updated_uids): uids = new_uids + updated_uids g_metadata = crispin_client.g_metadata(uids) to_download = self.__deduplicate_message_download( crispin_client, g_metadata, uids) if self.folder_name == crispin_client.folder_names()['inbox']: flags = crispin_client.flags(to_download) message_download_stack = LifoQueue() for uid in to_download: if uid in flags and uid in g_metadata: # IMAP will just return no data for a UID if it's # disappeared from the folder in the meantime. message_download_stack.put( GMessage(uid, g_metadata[uid], flags[uid].flags, flags[uid].labels)) self.__download_queued_threads(crispin_client, message_download_stack) elif self.folder_name in uid_download_folders(crispin_client): uid_download_stack = uid_list_to_stack(to_download) self.download_uids(crispin_client, uid_download_stack) else: raise MailsyncError('Unknown Gmail sync folder: {}'.format( self.folder_name))
def initial_sync_impl(self, crispin_client, local_uids, uid_download_stack): # We wrap the block in a try/finally because the greenlets like # new_uid_poller need to be killed when this greenlet is interrupted try: remote_uid_count = len(set(crispin_client.all_uids())) remote_g_metadata, update_uid_count = self.__fetch_g_metadata( crispin_client, local_uids) remote_uids = sorted(remote_g_metadata.keys(), key=int) log.info(remote_uid_count=len(remote_uids)) if self.folder_name == crispin_client.folder_names()['all']: log.info(local_uid_count=len(local_uids)) with self.syncmanager_lock: log.debug('gmail_initial_sync grabbed syncmanager_lock') with mailsync_session_scope() as db_session: deleted_uids = self.remove_deleted_uids( db_session, local_uids, remote_uids) delete_uid_count = len(deleted_uids) local_uids = set(local_uids) - deleted_uids unknown_uids = set(remote_uids) - local_uids # Persist the num(messages) to sync (any type of sync: # download, update or delete) before we start. Note that # num_local_deleted, num_local_updated ARE the numbers to # delete/update too since we make those changes rightaway # before we start downloading messages. self.update_uid_counts( db_session, remote_uid_count=remote_uid_count, download_uid_count=len(unknown_uids), update_uid_count=update_uid_count, delete_uid_count=delete_uid_count) if self.folder_name == crispin_client.folder_names()['inbox']: # We don't do an initial dedupe for Inbox because we do thread # expansion, which means even if we have a given msgid # downloaded, we miiight not have the whole thread. This means # that restarts cause duplicate work, but hopefully these # folders aren't too huge. message_download_stack = LifoQueue() flags = crispin_client.flags(unknown_uids) for uid in unknown_uids: if uid in flags: message_download_stack.put( GMessage(uid, remote_g_metadata[uid], flags[uid].flags, flags[uid].labels)) new_uid_poller = spawn(self.__check_new_g_thrids, message_download_stack) self.__download_queued_threads(crispin_client, message_download_stack) elif self.folder_name in uid_download_folders(crispin_client): full_download = self.__deduplicate_message_download( crispin_client, remote_g_metadata, unknown_uids) add_uids_to_stack(full_download, uid_download_stack) new_uid_poller = spawn(self.check_new_uids, uid_download_stack) self.download_uids(crispin_client, uid_download_stack) else: raise MailsyncError('Unknown Gmail sync folder: {}'.format( self.folder_name)) # Complete X-GM-MSGID mapping is no longer needed after initial # sync. rm_cache( remote_g_metadata_cache_file(self.account_id, self.folder_name)) finally: new_uid_poller.kill()
def gmail_initial_sync(crispin_client, log, folder_name, shared_state, local_uids, uid_download_stack, msg_create_fn): remote_uid_count = len(set(crispin_client.all_uids())) remote_g_metadata, update_uid_count = get_g_metadata( crispin_client, log, folder_name, local_uids, shared_state['syncmanager_lock']) remote_uids = sorted(remote_g_metadata.keys(), key=int) log.info(remote_uid_count=len(remote_uids)) if folder_name == crispin_client.folder_names()['all']: log.info(local_uid_count=len(local_uids)) with shared_state['syncmanager_lock']: log.debug('gmail_initial_sync grabbed syncmanager_lock') with session_scope(ignore_soft_deletes=False) as db_session: deleted_uids = remove_deleted_uids( crispin_client.account_id, db_session, log, folder_name, local_uids, remote_uids) delete_uid_count = len(deleted_uids) local_uids = set(local_uids) - deleted_uids unknown_uids = set(remote_uids) - local_uids # Persist the num(messages) to sync (any type of sync: download, # update or delete) before we start. # Note that num_local_deleted, num_local_updated ARE the numbers to # delete/update too since we make those changes rightaway before we # start downloading messages. update_uid_counts(db_session, log, crispin_client.account_id, folder_name, remote_uid_count=remote_uid_count, download_uid_count=len(unknown_uids), update_uid_count=update_uid_count, delete_uid_count=delete_uid_count) if folder_name == crispin_client.folder_names()['inbox']: # We don't do an initial dedupe for Inbox because we do thread # expansion, which means even if we have a given msgid downloaded, we # miiight not have the whole thread. This means that restarts cause # duplicate work, but hopefully these folders aren't too huge. message_download_stack = LifoQueue() flags = crispin_client.flags(unknown_uids) for uid in unknown_uids: if uid in flags: message_download_stack.put( GMessage(uid, remote_g_metadata[uid], flags[uid].flags, flags[uid].labels)) new_uid_poller = spawn(check_new_g_thrids, crispin_client.account_id, crispin_client.PROVIDER, folder_name, log, message_download_stack, shared_state['poll_frequency'], shared_state['syncmanager_lock']) download_queued_threads(crispin_client, log, folder_name, message_download_stack, shared_state['syncmanager_lock']) elif folder_name in uid_download_folders(crispin_client): full_download = deduplicate_message_download( crispin_client, log, shared_state['syncmanager_lock'], remote_g_metadata, unknown_uids) add_uids_to_stack(full_download, uid_download_stack) new_uid_poller = spawn(check_new_uids, crispin_client.account_id, folder_name, log, uid_download_stack, shared_state['poll_frequency'], shared_state['syncmanager_lock']) download_queued_uids(crispin_client, log, folder_name, uid_download_stack, len(local_uids), len(unknown_uids), shared_state['syncmanager_lock'], gmail_download_and_commit_uids, msg_create_fn) else: raise MailsyncError( 'Unknown Gmail sync folder: {}'.format(folder_name)) # Complete X-GM-MSGID mapping is no longer needed after initial sync. rm_cache(remote_g_metadata_cache_file(crispin_client.account_id, folder_name)) new_uid_poller.kill()