def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning('got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return # Highestmodseq has changed, update accordingly. crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() with session_scope() as db_session: common.update_metadata(self.account_id, self.folder_id, changed_flags, db_session) local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids, db_session) db_session.commit() self.highestmodseq = new_highestmodseq
def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids) flags = crispin_client.flags(local_uids) if (max_uids in self.flags_fetch_results and self.flags_fetch_results[max_uids] == (local_uids, flags)): # If the flags fetch response is exactly the same as the last one # we got, then we don't need to persist any changes. log.debug('Unchanged flags refresh response, ' 'not persisting changes', max_uids=max_uids) return log.debug('Changed flags refresh response, persisting changes', max_uids=max_uids) expunged_uids = set(local_uids).difference(flags.keys()) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, self.folder_role, flags, db_session) self.flags_fetch_results[max_uids] = (local_uids, flags)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the greenlets like # change_poller need to be killed when this greenlet is interrupted change_poller = None try: remote_uids = sorted(crispin_client.all_uids(), key=int) with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids( self.account_id, db_session, self.folder_id ) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids) - set(remote_uids) ) unknown_uids = set(remote_uids) - local_uids with session_scope(self.namespace_id) as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=len(unknown_uids), ) change_poller = gevent.spawn(self.poll_for_changes) bind_context(change_poller, "changepoller", self.account_id, self.folder_id) if self.is_all_mail(crispin_client): # Prioritize UIDs for messages in the inbox folder. if len(remote_uids) < 1e6: inbox_uids = set( crispin_client.search_uids(["X-GM-LABELS", "inbox"]) ) else: # The search above is really slow (times out) on really # large mailboxes, so bound the search to messages within # the past month in order to get anywhere. since = datetime.utcnow() - timedelta(days=30) inbox_uids = set( crispin_client.search_uids( ["X-GM-LABELS", "inbox", "SINCE", since] ) ) uids_to_download = sorted(unknown_uids - inbox_uids) + sorted( unknown_uids & inbox_uids ) else: uids_to_download = sorted(unknown_uids) for uids in chunk(reversed(uids_to_download), 1024): g_metadata = crispin_client.g_metadata(uids) # UIDs might have been expunged since sync started, in which # case the g_metadata call above will return nothing. # They may also have been preemptively downloaded by thread # expansion. We can omit such UIDs. uids = [u for u in uids if u in g_metadata and u not in self.saved_uids] self.batch_download_uids(crispin_client, uids, g_metadata) finally: if change_poller is not None: # schedule change_poller to die gevent.kill(change_poller)
def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) # Check for any deleted messages. remote_uids = crispin_client.all_uids() with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: with self.syncmanager_lock: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) # Get recent UIDs to monitor for flag changes. with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids( account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids, ) flags = crispin_client.flags(local_uids) if max_uids in self.flags_fetch_results and self.flags_fetch_results[ max_uids] == (local_uids, flags): # If the flags fetch response is exactly the same as the last one # we got, then we don't need to persist any changes. # Stopped logging this to reduce overall logging volume # log.debug('Unchanged flags refresh response, ' # 'not persisting changes', max_uids=max_uids) return log.debug("Changed flags refresh response, persisting changes", max_uids=max_uids) expunged_uids = set(local_uids).difference(flags.keys()) with self.syncmanager_lock: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) with self.syncmanager_lock, session_scope( self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, self.folder_role, flags, db_session) self.flags_fetch_results[max_uids] = (local_uids, flags)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the greenlets like # change_poller need to be killed when this greenlet is interrupted change_poller = None try: remote_uids = sorted(crispin_client.all_uids(), key=int) with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids) - set(remote_uids)) unknown_uids = set(remote_uids) - local_uids with session_scope(self.namespace_id) as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=len(unknown_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) if self.is_all_mail(crispin_client): # Prioritize UIDs for messages in the inbox folder. if len(remote_uids) < 1e6: inbox_uids = set( crispin_client.search_uids(['X-GM-LABELS', 'inbox'])) else: # The search above is really slow (times out) on really # large mailboxes, so bound the search to messages within # the past month in order to get anywhere. since = datetime.utcnow() - timedelta(days=30) inbox_uids = set(crispin_client.search_uids([ 'X-GM-LABELS', 'inbox', 'SINCE', since])) uids_to_download = (sorted(unknown_uids - inbox_uids) + sorted(unknown_uids & inbox_uids)) else: uids_to_download = sorted(unknown_uids) for uids in chunk(reversed(uids_to_download), 1024): g_metadata = crispin_client.g_metadata(uids) # UIDs might have been expunged since sync started, in which # case the g_metadata call above will return nothing. # They may also have been preemptively downloaded by thread # expansion. We can omit such UIDs. uids = [u for u in uids if u in g_metadata and u not in self.saved_uids] self.batch_download_uids(crispin_client, uids, g_metadata) finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids( account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids ) flags = crispin_client.flags(local_uids) expunged_uids = set(local_uids).difference(flags.keys()) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, flags, db_session)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids).difference(remote_uids), ) new_uids = set(remote_uids).difference(local_uids) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) throttled = account.throttled self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids), ) change_poller = gevent.spawn(self.poll_for_changes) bind_context(change_poller, "changepoller", self.account_id, self.folder_id) uids = sorted(new_uids, reverse=True) count = 0 for uid in uids: # The speedup from batching appears to be less clear for # non-Gmail accounts, so for now just download one-at-a-time. self.download_and_commit_uids(crispin_client, [uid]) self.heartbeat_status.publish() count += 1 if throttled and count >= THROTTLE_COUNT: # Throttled accounts' folders sync at a rate of # 1 message/ minute, after the first approx. THROTTLE_COUNT # messages per folder are synced. # Note this is an approx. limit since we use the #(uids), # not the #(messages). gevent.sleep(THROTTLE_WAIT) finally: if change_poller is not None: # schedule change_poller to die gevent.kill(change_poller)
def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning( 'got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return # Highestmodseq has changed, update accordingly. crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, changed_flags, db_session) local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: # If new UIDs have appeared since we last checked in # get_new_uids, save them first. We want to always have the # latest UIDs before expunging anything, in order to properly # capture draft revisions. with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) if remote_uids and lastseenuid < max(remote_uids): log.info('Downloading new UIDs before expunging') self.get_new_uids(crispin_client) with session_scope(self.namespace_id) as db_session: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids, db_session) db_session.commit() self.highestmodseq = new_highestmodseq
def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids) flags = crispin_client.flags(local_uids) expunged_uids = set(local_uids).difference(flags.keys()) with session_scope(self.namespace_id) as db_session: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids, db_session) common.update_metadata(self.account_id, self.folder_id, flags, db_session)
def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning('got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return # Highestmodseq has changed, update accordingly. crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, changed_flags, db_session) local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: # If new UIDs have appeared since we last checked in # get_new_uids, save them first. We want to always have the # latest UIDs before expunging anything, in order to properly # capture draft revisions. with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) if remote_uids and lastseenuid < max(remote_uids): log.info('Downloading new UIDs before expunging') self.get_new_uids(crispin_client) with session_scope(self.namespace_id) as db_session: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids, db_session) db_session.commit() self.highestmodseq = new_highestmodseq
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids).difference(remote_uids), db_session) new_uids = set(remote_uids).difference(local_uids) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) throttled = account.throttled self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) uids = sorted(new_uids, reverse=True) count = 0 for uid in uids: # The speedup from batching appears to be less clear for # non-Gmail accounts, so for now just download one-at-a-time. self.download_and_commit_uids(crispin_client, [uid]) self.heartbeat_status.publish() count += 1 if throttled and count >= THROTTLE_COUNT: # Throttled accounts' folders sync at a rate of # 1 message/ minute, after the first approx. THROTTLE_COUNT # messages per folder are synced. # Note this is an approx. limit since we use the #(uids), # not the #(messages). sleep(THROTTLE_WAIT) finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with session_scope() as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids).difference(remote_uids), db_session) new_uids = set(remote_uids).difference(local_uids) with session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) uids = sorted(new_uids, reverse=True) for uid in uids: # The speedup from batching appears to be less clear for # non-Gmail accounts, so for now just download one-at-a-time. self.download_and_commit_uids(crispin_client, [uid]) self.heartbeat_status.publish() finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with session_scope() as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids).difference(remote_uids), db_session) new_uids = set(remote_uids).difference(local_uids) with session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) uids = sorted(new_uids, reverse=True) for uid in uids: # The speedup from batching appears to be less clear for # non-Gmail accounts, so for now just download one-at-a-time. self.download_and_commit_uids(crispin_client, [uid]) self.heartbeat_status.publish() finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning('got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return log.info('HIGHESTMODSEQ has changed, getting changed UIDs', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() # In order to be able to sync changes to tens of thousands of flags at # once, we commit updates in batches. We do this in ascending order by # modseq and periodically "checkpoint" our saved highestmodseq. (It's # safe to checkpoint *because* we go in ascending order by modseq.) # That way if the process gets restarted halfway through this refresh, # we don't have to completely start over. It's also slow to load many # objects into the SQLAlchemy session and then issue lots of commits; # we avoid that by batching. flag_batches = chunk( sorted(changed_flags.items(), key=lambda (k, v): v.modseq), CONDSTORE_FLAGS_REFRESH_BATCH_SIZE) for flag_batch in flag_batches: with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, self.folder_role, dict(flag_batch), db_session) if len(flag_batch) == CONDSTORE_FLAGS_REFRESH_BATCH_SIZE: interim_highestmodseq = max(v.modseq for k, v in flag_batch) self.highestmodseq = interim_highestmodseq with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: # If new UIDs have appeared since we last checked in # get_new_uids, save them first. We want to always have the # latest UIDs before expunging anything, in order to properly # capture draft revisions. with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) if remote_uids and lastseenuid < max(remote_uids): log.info('Downloading new UIDs before expunging') self.get_new_uids(crispin_client) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) self.highestmodseq = new_highestmodseq