def update_metadata(account_id, folder_id, folder_role, new_flags, session): """ Update flags and labels (the only metadata that can change). Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if not new_flags: return account = Account.get(account_id, session) change_count = 0 for item in session.query(ImapUid).filter( ImapUid.account_id == account_id, ImapUid.msg_uid.in_(new_flags.keys()), ImapUid.folder_id == folder_id): flags = new_flags[item.msg_uid].flags labels = getattr(new_flags[item.msg_uid], 'labels', None) # TODO(emfree) refactor so this is only ever relevant for Gmail. changed = item.update_flags(flags) if labels is not None: item.update_labels(labels) changed = True if changed: change_count += 1 is_draft = item.is_draft and (folder_role == 'drafts' or folder_role == 'all') update_message_metadata(session, account, item.message, is_draft) session.commit() log.info('Updated UID metadata', changed=change_count, out_of=len(new_flags))
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return new_uids = set() with self.syncmanager_lock: with session_scope() as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) raw_messages = self.__deduplicate_message_object_creation( db_session, raw_messages, account) if not raw_messages: return 0 for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.commit() new_uids.add(uid) log.info('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == "initial" and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False self.saved_uids.update(new_uids)
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return new_uids = set() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) raw_messages = self.__deduplicate_message_object_creation( db_session, raw_messages, account) if not raw_messages: return 0 for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.commit() new_uids.add(uid) log.info('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == "initial" and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False self.saved_uids.update(new_uids)
def update_metadata(account_id, folder_id, new_flags, session): """ Update flags and labels (the only metadata that can change). Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if not new_flags: return account = Account.get(account_id, session) change_count = 0 for item in session.query(ImapUid).filter( ImapUid.account_id == account_id, ImapUid.msg_uid.in_(new_flags.keys()), ImapUid.folder_id == folder_id): flags = new_flags[item.msg_uid].flags labels = getattr(new_flags[item.msg_uid], 'labels', None) # TODO(emfree) refactor so this is only ever relevant for Gmail. changed = item.update_flags(flags) if labels is not None: item.update_labels(labels) changed = True if changed: change_count += 1 update_message_metadata(session, account, item.message, item.is_draft) session.commit() log.info('Updated UID metadata', changed=change_count, out_of=len(new_flags))
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return 0 new_uids = set() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.flush() new_uids.add(uid) db_session.commit() log.debug('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == 'initial' and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False return len(new_uids)
def __deduplicate_message_object_creation(self, db_session, raw_messages, account): """ We deduplicate messages based on g_msgid: if we've previously saved a Message object for this raw message, we don't create a new one. But we do create a new ImapUid, associate it to the message, and update flags and categories accordingly. Note: we could do this prior to downloading the actual message body, but that's really more complicated than it's worth. This operation is not super common unless you're regularly moving lots of messages to trash or spam, and even then the overhead of just downloading the body is generally not that high. """ new_g_msgids = {msg.g_msgid for msg in raw_messages} existing_g_msgids = g_msgids(self.namespace_id, db_session, in_=new_g_msgids) brand_new_messages = [ m for m in raw_messages if m.g_msgid not in existing_g_msgids ] previously_synced_messages = [ m for m in raw_messages if m.g_msgid in existing_g_msgids ] if previously_synced_messages: log.info('saving new uids for existing messages', count=len(previously_synced_messages)) account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for raw_message in previously_synced_messages: message_obj = db_session.query(Message).filter( Message.namespace_id == self.namespace_id, Message.g_msgid == raw_message.g_msgid).first() if message_obj is None: log.warning('Message disappeared while saving new uid', g_msgid=raw_message.g_msgid, uid=raw_message.uid) brand_new_messages.append(raw_message) continue already_have_uid = ((raw_message.uid, self.folder_id) in {(u.msg_uid, u.folder_id) for u in message_obj.imapuids}) if already_have_uid: log.warning('Skipping existing UID for message', uid=raw_message.uid, message_id=message_obj.id) continue uid = ImapUid(account=account, folder=folder, msg_uid=raw_message.uid, message=message_obj) uid.update_flags(raw_message.flags) uid.update_labels(raw_message.g_labels) common.update_message_metadata(db_session, account, message_obj, uid.is_draft) db_session.commit() return brand_new_messages
def remove_deleted_uids(account_id, folder_id, uids): """ Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if not uids: return deleted_uid_count = 0 for uid in uids: # We do this one-uid-at-a-time because issuing many deletes within a # single database transaction is problematic. But loading many # objects into a session and then frequently calling commit() is also # bad, because expiring objects and checking for revisions is O(number # of objects in session), resulting in quadratic runtimes. # Performance could perhaps be additionally improved by choosing a # sane balance, e.g., operating on 10 or 100 uids or something at once. with session_scope(account_id) as db_session: imapuid = (db_session.query(ImapUid).filter( ImapUid.account_id == account_id, ImapUid.folder_id == folder_id, ImapUid.msg_uid == uid, ).first()) if imapuid is None: continue deleted_uid_count += 1 message = imapuid.message db_session.delete(imapuid) if message is not None: if not message.imapuids and message.is_draft: # Synchronously delete drafts. thread = message.thread if thread is not None: thread.messages.remove(message) # Thread.messages relationship is versioned i.e. extra # logic gets executed on remove call. # This early flush is needed so the configure_versioning logic # in inbox.model.sessions can work reliably on newer versions of # SQLAlchemy. db_session.flush() db_session.delete(message) if thread is not None and not thread.messages: db_session.delete(thread) else: account = Account.get(account_id, db_session) update_message_metadata(db_session, account, message, message.is_draft) if not message.imapuids: # But don't outright delete messages. Just mark them as # 'deleted' and wait for the asynchronous # dangling-message-collector to delete them. message.mark_for_deletion() db_session.commit() log.info("Deleted expunged UIDs", count=deleted_uid_count)
def __deduplicate_message_object_creation(self, db_session, raw_messages, account): """ We deduplicate messages based on g_msgid: if we've previously saved a Message object for this raw message, we don't create a new one. But we do create a new ImapUid, associate it to the message, and update flags and categories accordingly. Note: we could do this prior to downloading the actual message body, but that's really more complicated than it's worth. This operation is not super common unless you're regularly moving lots of messages to trash or spam, and even then the overhead of just downloading the body is generally not that high. """ new_g_msgids = {msg.g_msgid for msg in raw_messages} existing_g_msgids = g_msgids(self.namespace_id, db_session, in_=new_g_msgids) brand_new_messages = [m for m in raw_messages if m.g_msgid not in existing_g_msgids] previously_synced_messages = [m for m in raw_messages if m.g_msgid in existing_g_msgids] if previously_synced_messages: log.info('saving new uids for existing messages', count=len(previously_synced_messages)) account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for raw_message in previously_synced_messages: message_obj = db_session.query(Message).filter( Message.namespace_id == self.namespace_id, Message.g_msgid == raw_message.g_msgid).first() if message_obj is None: log.warning( 'Message disappeared while saving new uid', g_msgid=raw_message.g_msgid, uid=raw_message.uid) brand_new_messages.append(raw_message) continue already_have_uid = ( (raw_message.uid, self.folder_id) in {(u.msg_uid, u.folder_id) for u in message_obj.imapuids} ) if already_have_uid: log.warning('Skipping existing UID for message', uid=raw_message.uid, message_id=message_obj.id) continue uid = ImapUid(account=account, folder=folder, msg_uid=raw_message.uid, message=message_obj) uid.update_flags(raw_message.flags) uid.update_labels(raw_message.g_labels) common.update_message_metadata( db_session, account, message_obj, uid.is_draft) db_session.commit() return brand_new_messages
def remove_deleted_uids(account_id, folder_id, uids): """ Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if not uids: return deleted_uid_count = 0 for uid in uids: # We do this one-uid-at-a-time because issuing many deletes within a # single database transaction is problematic. But loading many # objects into a session and then frequently calling commit() is also # bad, because expiring objects and checking for revisions is O(number # of objects in session), resulting in quadratic runtimes. # Performance could perhaps be additionally improved by choosing a # sane balance, e.g., operating on 10 or 100 uids or something at once. with session_scope(account_id) as db_session: imapuid = ( db_session.query(ImapUid) .filter(ImapUid.account_id == account_id, ImapUid.folder_id == folder_id, ImapUid.msg_uid == uid) .first() ) if imapuid is None: continue deleted_uid_count += 1 message = imapuid.message db_session.delete(imapuid) if message is not None: if not message.imapuids and message.is_draft: # Synchronously delete drafts. thread = message.thread if thread is not None: thread.messages.remove(message) db_session.delete(message) if thread is not None and not thread.messages: db_session.delete(thread) else: account = Account.get(account_id, db_session) update_message_metadata(db_session, account, message, message.is_draft) if not message.imapuids: # But don't outright delete messages. Just mark them as # 'deleted' and wait for the asynchronous # dangling-message-collector to delete them. message.mark_for_deletion() db_session.commit() log.info("Deleted expunged UIDs", count=deleted_uid_count)
def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, syncmanager_lock): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.conn_pool = connection_pool(self.account_id) # Metric flags for sync performance self.is_initial_sync = False self.is_first_sync = False self.is_first_message = False with session_scope() as db_session: account = Account.get(self.account_id, db_session) self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" folder = Folder.get(self.folder_id, db_session) if folder: self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def remove_deleted_uids(account_id, folder_id, uids, session): """ Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if uids: deletes = session.query(ImapUid).filter( ImapUid.account_id == account_id, ImapUid.folder_id == folder_id, ImapUid.msg_uid.in_(uids)).all() affected_messages = { uid.message for uid in deletes if uid.message is not None } for uid in deletes: session.delete(uid) session.commit() account = Account.get(account_id, session) for message in affected_messages: if not message.imapuids and message.is_draft: # Synchronously delete drafts. thread = message.thread thread.messages.remove(message) session.delete(message) if not thread.messages: session.delete(thread) else: update_message_metadata(session, account, message, message.is_draft) if not message.imapuids: # But don't outright delete messages. Just mark them as # 'deleted' and wait for the asynchronous # dangling-message-collector to delete them. message.mark_for_deletion() log.info('Deleted expunged UIDs', count=len(deletes)) session.commit()
def remove_deleted_uids(account_id, folder_id, uids, session): """ Make sure you're holding a db write lock on the account. (We don't try to grab the lock in here in case the caller needs to put higher-level functionality in the lock.) """ if uids: deletes = session.query(ImapUid).filter( ImapUid.account_id == account_id, ImapUid.folder_id == folder_id, ImapUid.msg_uid.in_(uids)).all() affected_messages = {uid.message for uid in deletes if uid.message is not None} for uid in deletes: session.delete(uid) session.commit() account = Account.get(account_id, session) for message in affected_messages: if not message.imapuids and message.is_draft: # Synchronously delete drafts. thread = message.thread thread.messages.remove(message) session.delete(message) if not thread.messages: session.delete(thread) else: update_message_metadata(session, account, message, message.is_draft) if not message.imapuids: # But don't outright delete messages. Just mark them as # 'deleted' and wait for the asynchronous # dangling-message-collector to delete them. message.mark_for_deletion() log.info('Deleted expunged UIDs', count=len(deletes)) session.commit()