def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return new_uids = set() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) raw_messages = self.__deduplicate_message_object_creation( db_session, raw_messages, account) if not raw_messages: return 0 for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.commit() new_uids.add(uid) log.info('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == "initial" and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False self.saved_uids.update(new_uids)
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return new_uids = set() with self.syncmanager_lock: with session_scope() as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) raw_messages = self.__deduplicate_message_object_creation( db_session, raw_messages, account) if not raw_messages: return 0 for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.commit() new_uids.add(uid) log.info('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == "initial" and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False self.saved_uids.update(new_uids)
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return 0 new_uids = set() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.flush() new_uids.add(uid) db_session.commit() log.debug('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == 'initial' and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False return len(new_uids)
def __deduplicate_message_object_creation(self, db_session, raw_messages, account): """ We deduplicate messages based on g_msgid: if we've previously saved a Message object for this raw message, we don't create a new one. But we do create a new ImapUid, associate it to the message, and update flags and categories accordingly. Note: we could do this prior to downloading the actual message body, but that's really more complicated than it's worth. This operation is not super common unless you're regularly moving lots of messages to trash or spam, and even then the overhead of just downloading the body is generally not that high. """ new_g_msgids = {msg.g_msgid for msg in raw_messages} existing_g_msgids = g_msgids(self.namespace_id, db_session, in_=new_g_msgids) brand_new_messages = [ m for m in raw_messages if m.g_msgid not in existing_g_msgids ] previously_synced_messages = [ m for m in raw_messages if m.g_msgid in existing_g_msgids ] if previously_synced_messages: log.info('saving new uids for existing messages', count=len(previously_synced_messages)) account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for raw_message in previously_synced_messages: message_obj = db_session.query(Message).filter( Message.namespace_id == self.namespace_id, Message.g_msgid == raw_message.g_msgid).first() if message_obj is None: log.warning('Message disappeared while saving new uid', g_msgid=raw_message.g_msgid, uid=raw_message.uid) brand_new_messages.append(raw_message) continue already_have_uid = ((raw_message.uid, self.folder_id) in {(u.msg_uid, u.folder_id) for u in message_obj.imapuids}) if already_have_uid: log.warning('Skipping existing UID for message', uid=raw_message.uid, message_id=message_obj.id) continue uid = ImapUid(account=account, folder=folder, msg_uid=raw_message.uid, message=message_obj) uid.update_flags(raw_message.flags) uid.update_labels(raw_message.g_labels) common.update_message_metadata(db_session, account, message_obj, uid.is_draft) db_session.commit() return brand_new_messages
def __deduplicate_message_object_creation(self, db_session, raw_messages, account): """ We deduplicate messages based on g_msgid: if we've previously saved a Message object for this raw message, we don't create a new one. But we do create a new ImapUid, associate it to the message, and update flags and categories accordingly. Note: we could do this prior to downloading the actual message body, but that's really more complicated than it's worth. This operation is not super common unless you're regularly moving lots of messages to trash or spam, and even then the overhead of just downloading the body is generally not that high. """ new_g_msgids = {msg.g_msgid for msg in raw_messages} existing_g_msgids = g_msgids(self.namespace_id, db_session, in_=new_g_msgids) brand_new_messages = [m for m in raw_messages if m.g_msgid not in existing_g_msgids] previously_synced_messages = [m for m in raw_messages if m.g_msgid in existing_g_msgids] if previously_synced_messages: log.info('saving new uids for existing messages', count=len(previously_synced_messages)) account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) for raw_message in previously_synced_messages: message_obj = db_session.query(Message).filter( Message.namespace_id == self.namespace_id, Message.g_msgid == raw_message.g_msgid).first() if message_obj is None: log.warning( 'Message disappeared while saving new uid', g_msgid=raw_message.g_msgid, uid=raw_message.uid) brand_new_messages.append(raw_message) continue already_have_uid = ( (raw_message.uid, self.folder_id) in {(u.msg_uid, u.folder_id) for u in message_obj.imapuids} ) if already_have_uid: log.warning('Skipping existing UID for message', uid=raw_message.uid, message_id=message_obj.id) continue uid = ImapUid(account=account, folder=folder, msg_uid=raw_message.uid, message=message_obj) uid.update_flags(raw_message.flags) uid.update_labels(raw_message.g_labels) common.update_message_metadata( db_session, account, message_obj, uid.is_draft) db_session.commit() return brand_new_messages
def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, syncmanager_lock): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.conn_pool = connection_pool(self.account_id) # Metric flags for sync performance self.is_initial_sync = False self.is_first_sync = False self.is_first_message = False with session_scope() as db_session: account = Account.get(self.account_id, db_session) self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" folder = Folder.get(self.folder_id, db_session) if folder: self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, syncmanager_lock): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.conn_pool = connection_pool(self.account_id) # Metric flags for sync performance self.is_initial_sync = False self.is_first_sync = False self.is_first_message = False with session_scope() as db_session: account = Account.get(self.account_id, db_session) self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" folder = Folder.get(self.folder_id, db_session) if folder: self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)