Example #1
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return
        new_uids = set()
        with self.syncmanager_lock:
            with session_scope() as db_session:
                account = Account.get(self.account_id, db_session)
                folder = Folder.get(self.folder_id, db_session)
                raw_messages = self.__deduplicate_message_object_creation(
                    db_session, raw_messages, account)
                if not raw_messages:
                    return 0

                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder,
                                              msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.commit()
                        new_uids.add(uid)

        log.info('Committed new UIDs',
                 new_committed_message_count=len(new_uids))
        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == "initial" and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))

        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        self.saved_uids.update(new_uids)
Example #2
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return 0

        new_uids = set()
        with self.syncmanager_lock:
            with session_scope(self.namespace_id) as db_session:
                account = Account.get(self.account_id, db_session)
                folder = Folder.get(self.folder_id, db_session)
                for msg in raw_messages:
                    uid = self.create_message(db_session, account,
                                              folder, msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.flush()
                        new_uids.add(uid)
                db_session.commit()

        log.debug('Committed new UIDs', new_committed_message_count=len(new_uids))
        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == 'initial' and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))
        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        return len(new_uids)
Example #3
0
def update_metadata(account_id, folder_id, new_flags, session):
    """
    Update flags and labels (the only metadata that can change).

    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if not new_flags:
        return

    account = Account.get(account_id, session)
    change_count = 0
    for item in session.query(ImapUid).filter(
            ImapUid.account_id == account_id,
            ImapUid.msg_uid.in_(new_flags.keys()),
            ImapUid.folder_id == folder_id):
        flags = new_flags[item.msg_uid].flags
        labels = getattr(new_flags[item.msg_uid], 'labels', None)

        # TODO(emfree) refactor so this is only ever relevant for Gmail.
        changed = item.update_flags(flags)
        if labels is not None:
            item.update_labels(labels)
            changed = True

        if changed:
            change_count += 1
            update_message_metadata(session, account, item.message,
                                    item.is_draft)
            session.commit()
    log.info('Updated UID metadata', changed=change_count,
             out_of=len(new_flags))
Example #4
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return
        new_uids = set()
        with self.syncmanager_lock:
            with session_scope(self.namespace_id) as db_session:
                account = Account.get(self.account_id, db_session)
                folder = Folder.get(self.folder_id, db_session)
                raw_messages = self.__deduplicate_message_object_creation(
                    db_session, raw_messages, account)
                if not raw_messages:
                    return 0

                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder, msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.commit()
                        new_uids.add(uid)

        log.info('Committed new UIDs',
                 new_committed_message_count=len(new_uids))
        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == "initial" and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))

        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        self.saved_uids.update(new_uids)
Example #5
0
def update_metadata(account_id, folder_id, folder_role, new_flags, session):
    """
    Update flags and labels (the only metadata that can change).

    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if not new_flags:
        return

    account = Account.get(account_id, session)
    change_count = 0
    for item in session.query(ImapUid).filter(
            ImapUid.account_id == account_id,
            ImapUid.msg_uid.in_(new_flags.keys()),
            ImapUid.folder_id == folder_id):
        flags = new_flags[item.msg_uid].flags
        labels = getattr(new_flags[item.msg_uid], 'labels', None)

        # TODO(emfree) refactor so this is only ever relevant for Gmail.
        changed = item.update_flags(flags)
        if labels is not None:
            item.update_labels(labels)
            changed = True

        if changed:
            change_count += 1
            is_draft = item.is_draft and (folder_role == 'drafts' or
                                          folder_role == 'all')
            update_message_metadata(session, account, item.message, is_draft)
            session.commit()
    log.info('Updated UID metadata', changed=change_count,
             out_of=len(new_flags))
Example #6
0
def add_fake_account(db_session, email_address='*****@*****.**'):
    from inbox.models import Account, Namespace
    namespace = Namespace()
    account = Account(email_address=email_address, namespace=namespace)
    db_session.add(account)
    db_session.commit()
    return account
Example #7
0
    def __deduplicate_message_object_creation(self, db_session, raw_messages,
                                              account):
        """
        We deduplicate messages based on g_msgid: if we've previously saved a
        Message object for this raw message, we don't create a new one. But we
        do create a new ImapUid, associate it to the message, and update flags
        and categories accordingly.
        Note: we could do this prior to downloading the actual message
        body, but that's really more complicated than it's worth. This
        operation is not super common unless you're regularly moving lots
        of messages to trash or spam, and even then the overhead of just
        downloading the body is generally not that high.

        """
        new_g_msgids = {msg.g_msgid for msg in raw_messages}
        existing_g_msgids = g_msgids(self.namespace_id,
                                     db_session,
                                     in_=new_g_msgids)
        brand_new_messages = [
            m for m in raw_messages if m.g_msgid not in existing_g_msgids
        ]
        previously_synced_messages = [
            m for m in raw_messages if m.g_msgid in existing_g_msgids
        ]
        if previously_synced_messages:
            log.info('saving new uids for existing messages',
                     count=len(previously_synced_messages))
            account = Account.get(self.account_id, db_session)
            folder = Folder.get(self.folder_id, db_session)
            for raw_message in previously_synced_messages:
                message_obj = db_session.query(Message).filter(
                    Message.namespace_id == self.namespace_id,
                    Message.g_msgid == raw_message.g_msgid).first()
                if message_obj is None:
                    log.warning('Message disappeared while saving new uid',
                                g_msgid=raw_message.g_msgid,
                                uid=raw_message.uid)
                    brand_new_messages.append(raw_message)
                    continue
                already_have_uid = ((raw_message.uid, self.folder_id)
                                    in {(u.msg_uid, u.folder_id)
                                        for u in message_obj.imapuids})
                if already_have_uid:
                    log.warning('Skipping existing UID for message',
                                uid=raw_message.uid,
                                message_id=message_obj.id)
                    continue
                uid = ImapUid(account=account,
                              folder=folder,
                              msg_uid=raw_message.uid,
                              message=message_obj)
                uid.update_flags(raw_message.flags)
                uid.update_labels(raw_message.g_labels)
                common.update_message_metadata(db_session, account,
                                               message_obj, uid.is_draft)
                db_session.commit()

        return brand_new_messages
Example #8
0
def remove_deleted_uids(account_id, folder_id, uids):
    """
    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if not uids:
        return
    deleted_uid_count = 0
    for uid in uids:
        # We do this one-uid-at-a-time because issuing many deletes within a
        # single database transaction is problematic. But loading many
        # objects into a session and then frequently calling commit() is also
        # bad, because expiring objects and checking for revisions is O(number
        # of objects in session), resulting in quadratic runtimes.
        # Performance could perhaps be additionally improved by choosing a
        # sane balance, e.g., operating on 10 or 100 uids or something at once.
        with session_scope(account_id) as db_session:
            imapuid = (db_session.query(ImapUid).filter(
                ImapUid.account_id == account_id,
                ImapUid.folder_id == folder_id,
                ImapUid.msg_uid == uid,
            ).first())
            if imapuid is None:
                continue
            deleted_uid_count += 1
            message = imapuid.message

            db_session.delete(imapuid)

            if message is not None:
                if not message.imapuids and message.is_draft:
                    # Synchronously delete drafts.
                    thread = message.thread
                    if thread is not None:
                        thread.messages.remove(message)
                        # Thread.messages relationship is versioned i.e. extra
                        # logic gets executed on remove call.
                        # This early flush is needed so the configure_versioning logic
                        # in inbox.model.sessions can work reliably on newer versions of
                        # SQLAlchemy.
                        db_session.flush()
                    db_session.delete(message)
                    if thread is not None and not thread.messages:
                        db_session.delete(thread)
                else:
                    account = Account.get(account_id, db_session)
                    update_message_metadata(db_session, account, message,
                                            message.is_draft)
                    if not message.imapuids:
                        # But don't outright delete messages. Just mark them as
                        # 'deleted' and wait for the asynchronous
                        # dangling-message-collector to delete them.
                        message.mark_for_deletion()
            db_session.commit()
    log.info("Deleted expunged UIDs", count=deleted_uid_count)
Example #9
0
    def __deduplicate_message_object_creation(self, db_session, raw_messages,
                                              account):
        """
        We deduplicate messages based on g_msgid: if we've previously saved a
        Message object for this raw message, we don't create a new one. But we
        do create a new ImapUid, associate it to the message, and update flags
        and categories accordingly.
        Note: we could do this prior to downloading the actual message
        body, but that's really more complicated than it's worth. This
        operation is not super common unless you're regularly moving lots
        of messages to trash or spam, and even then the overhead of just
        downloading the body is generally not that high.

        """
        new_g_msgids = {msg.g_msgid for msg in raw_messages}
        existing_g_msgids = g_msgids(self.namespace_id, db_session,
                                     in_=new_g_msgids)
        brand_new_messages = [m for m in raw_messages if m.g_msgid not in
                              existing_g_msgids]
        previously_synced_messages = [m for m in raw_messages if m.g_msgid in
                                      existing_g_msgids]
        if previously_synced_messages:
            log.info('saving new uids for existing messages',
                     count=len(previously_synced_messages))
            account = Account.get(self.account_id, db_session)
            folder = Folder.get(self.folder_id, db_session)
            for raw_message in previously_synced_messages:
                message_obj = db_session.query(Message).filter(
                    Message.namespace_id == self.namespace_id,
                    Message.g_msgid == raw_message.g_msgid).first()
                if message_obj is None:
                    log.warning(
                        'Message disappeared while saving new uid',
                        g_msgid=raw_message.g_msgid,
                        uid=raw_message.uid)
                    brand_new_messages.append(raw_message)
                    continue
                already_have_uid = (
                    (raw_message.uid, self.folder_id) in
                    {(u.msg_uid, u.folder_id) for u in message_obj.imapuids}
                )
                if already_have_uid:
                    log.warning('Skipping existing UID for message',
                                uid=raw_message.uid, message_id=message_obj.id)
                    continue
                uid = ImapUid(account=account,
                              folder=folder,
                              msg_uid=raw_message.uid,
                              message=message_obj)
                uid.update_flags(raw_message.flags)
                uid.update_labels(raw_message.g_labels)
                common.update_message_metadata(
                    db_session, account, message_obj, uid.is_draft)
                db_session.commit()

        return brand_new_messages
def test_stealing_limited_by_host(db, config):
    host = platform.node()
    config['DATABASE_HOSTS'][0]['SHARDS'][0]['SYNC_HOSTS'] = [host]
    config['DATABASE_HOSTS'][0]['SHARDS'][1]['SYNC_HOSTS'] = ['otherhost']
    purge_other_accounts()
    ss = SyncService(cpu_id=0, total_cpus=1)
    for key in (0, 1):
        with session_scope_by_shard_id(key) as db_session:
            acc = Account()
            acc.namespace = Namespace()
            db_session.add(acc)
            db_session.commit()

    ss.accounts_to_start()
    with session_scope_by_shard_id(0) as db_session:
        acc = db_session.query(Account).first()
        assert acc.sync_host == host
    with session_scope_by_shard_id(1) as db_session:
        acc = db_session.query(Account).first()
        assert acc.sync_host is None
def test_stealing_limited_by_host(db, config):
    host = platform.node()
    config['DATABASE_HOSTS'][0]['SHARDS'][0]['SYNC_HOSTS'] = [host]
    config['DATABASE_HOSTS'][0]['SHARDS'][1]['SYNC_HOSTS'] = ['otherhost']
    purge_other_accounts()
    ss = SyncService(cpu_id=0, total_cpus=1)
    for key in (0, 1):
        with session_scope_by_shard_id(key) as db_session:
            acc = Account()
            acc.namespace = Namespace()
            db_session.add(acc)
            db_session.commit()

    ss.accounts_to_start()
    with session_scope_by_shard_id(0) as db_session:
        acc = db_session.query(Account).first()
        assert acc.sync_host == host
    with session_scope_by_shard_id(1) as db_session:
        acc = db_session.query(Account).first()
        assert acc.sync_host is None
Example #12
0
def test_accounts_started_on_all_shards(db, default_account, config):
    config['SYNC_STEAL_ACCOUNTS'] = True
    purge_other_accounts(default_account)
    default_account.sync_host = None
    db.session.commit()
    ss = SyncService(cpu_id=0, total_cpus=1)
    ss.host = 'localhost'
    account_ids = {default_account.id}
    for key in (0, 1):
        with session_scope_by_shard_id(key) as db_session:
            acc = Account()
            acc.namespace = Namespace()
            db_session.add(acc)
            db_session.commit()
            account_ids.add(acc.id)

    assert len(account_ids) == 3
    assert set(ss.accounts_to_start()) == account_ids
    for id_ in account_ids:
        with session_scope(id_) as db_session:
            acc = db_session.query(Account).get(id_)
            assert acc.sync_host == 'localhost'
def test_accounts_started_on_all_shards(db, default_account, config):
    config['SYNC_STEAL_ACCOUNTS'] = True
    purge_other_accounts(default_account)
    default_account.sync_host = None
    db.session.commit()
    ss = SyncService(cpu_id=0, total_cpus=1)
    ss.host = 'localhost'
    account_ids = {default_account.id}
    for key in (0, 1):
        with session_scope_by_shard_id(key) as db_session:
            acc = Account()
            acc.namespace = Namespace()
            db_session.add(acc)
            db_session.commit()
            account_ids.add(acc.id)

    assert len(account_ids) == 3
    assert set(ss.accounts_to_start()) == account_ids
    for id_ in account_ids:
        with session_scope(id_) as db_session:
            acc = db_session.query(Account).get(id_)
            assert acc.sync_host == 'localhost'
Example #14
0
def remove_deleted_uids(account_id, folder_id, uids):
    """
    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if not uids:
        return
    deleted_uid_count = 0
    for uid in uids:
        # We do this one-uid-at-a-time because issuing many deletes within a
        # single database transaction is problematic. But loading many
        # objects into a session and then frequently calling commit() is also
        # bad, because expiring objects and checking for revisions is O(number
        # of objects in session), resulting in quadratic runtimes.
        # Performance could perhaps be additionally improved by choosing a
        # sane balance, e.g., operating on 10 or 100 uids or something at once.
        with session_scope(account_id) as db_session:
            imapuid = (
                db_session.query(ImapUid)
                .filter(ImapUid.account_id == account_id, ImapUid.folder_id == folder_id, ImapUid.msg_uid == uid)
                .first()
            )
            if imapuid is None:
                continue
            deleted_uid_count += 1
            message = imapuid.message

            db_session.delete(imapuid)

            if message is not None:
                if not message.imapuids and message.is_draft:
                    # Synchronously delete drafts.
                    thread = message.thread
                    if thread is not None:
                        thread.messages.remove(message)
                    db_session.delete(message)
                    if thread is not None and not thread.messages:
                        db_session.delete(thread)
                else:
                    account = Account.get(account_id, db_session)
                    update_message_metadata(db_session, account, message, message.is_draft)
                    if not message.imapuids:
                        # But don't outright delete messages. Just mark them as
                        # 'deleted' and wait for the asynchronous
                        # dangling-message-collector to delete them.
                        message.mark_for_deletion()
            db_session.commit()
    log.info("Deleted expunged UIDs", count=deleted_uid_count)
Example #15
0
    def __init__(self, account_id, folder_name, folder_id, email_address,
                 provider_name, syncmanager_lock):
        bind_context(self, 'foldersyncengine', account_id, folder_id)
        self.account_id = account_id
        self.folder_name = folder_name
        self.folder_id = folder_id
        if self.folder_name.lower() == 'inbox':
            self.poll_frequency = INBOX_POLL_FREQUENCY
        else:
            self.poll_frequency = DEFAULT_POLL_FREQUENCY
        self.syncmanager_lock = syncmanager_lock
        self.state = None
        self.provider_name = provider_name
        self.last_fast_refresh = None
        self.conn_pool = connection_pool(self.account_id)

        # Metric flags for sync performance
        self.is_initial_sync = False
        self.is_first_sync = False
        self.is_first_message = False

        with session_scope() as db_session:
            account = Account.get(self.account_id, db_session)
            self.namespace_id = account.namespace.id
            assert self.namespace_id is not None, "namespace_id is None"

            folder = Folder.get(self.folder_id, db_session)
            if folder:
                self.is_initial_sync = folder.initial_sync_end is None
                self.is_first_sync = folder.initial_sync_start is None
                self.is_first_message = self.is_first_sync

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
        }

        Greenlet.__init__(self)

        self.heartbeat_status = HeartbeatStatusProxy(self.account_id,
                                                     self.folder_id,
                                                     self.folder_name,
                                                     email_address,
                                                     self.provider_name)
Example #16
0
    def __init__(self, account_id, folder_name, folder_id, email_address,
                 provider_name, syncmanager_lock):
        bind_context(self, 'foldersyncengine', account_id, folder_id)
        self.account_id = account_id
        self.folder_name = folder_name
        self.folder_id = folder_id
        if self.folder_name.lower() == 'inbox':
            self.poll_frequency = INBOX_POLL_FREQUENCY
        else:
            self.poll_frequency = DEFAULT_POLL_FREQUENCY
        self.syncmanager_lock = syncmanager_lock
        self.state = None
        self.provider_name = provider_name
        self.last_fast_refresh = None
        self.conn_pool = connection_pool(self.account_id)

        # Metric flags for sync performance
        self.is_initial_sync = False
        self.is_first_sync = False
        self.is_first_message = False

        with session_scope() as db_session:
            account = Account.get(self.account_id, db_session)
            self.namespace_id = account.namespace.id
            assert self.namespace_id is not None, "namespace_id is None"

            folder = Folder.get(self.folder_id, db_session)
            if folder:
                self.is_initial_sync = folder.initial_sync_end is None
                self.is_first_sync = folder.initial_sync_start is None
                self.is_first_message = self.is_first_sync

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
        }

        Greenlet.__init__(self)

        self.heartbeat_status = HeartbeatStatusProxy(self.account_id,
                                                     self.folder_id,
                                                     self.folder_name,
                                                     email_address,
                                                     self.provider_name)
Example #17
0
def remove_deleted_uids(account_id, folder_id, uids, session):
    """
    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if uids:
        deletes = session.query(ImapUid).filter(
            ImapUid.account_id == account_id, ImapUid.folder_id == folder_id,
            ImapUid.msg_uid.in_(uids)).all()
        affected_messages = {
            uid.message
            for uid in deletes if uid.message is not None
        }

        for uid in deletes:
            session.delete(uid)
        session.commit()

        account = Account.get(account_id, session)

        for message in affected_messages:
            if not message.imapuids and message.is_draft:
                # Synchronously delete drafts.
                thread = message.thread
                thread.messages.remove(message)
                session.delete(message)
                if not thread.messages:
                    session.delete(thread)
            else:
                update_message_metadata(session, account, message,
                                        message.is_draft)
                if not message.imapuids:
                    # But don't outright delete messages. Just mark them as
                    # 'deleted' and wait for the asynchronous
                    # dangling-message-collector to delete them.
                    message.mark_for_deletion()

        log.info('Deleted expunged UIDs', count=len(deletes))
        session.commit()
Example #18
0
def remove_deleted_uids(account_id, folder_id, uids, session):
    """
    Make sure you're holding a db write lock on the account. (We don't try
    to grab the lock in here in case the caller needs to put higher-level
    functionality in the lock.)

    """
    if uids:
        deletes = session.query(ImapUid).filter(
            ImapUid.account_id == account_id,
            ImapUid.folder_id == folder_id,
            ImapUid.msg_uid.in_(uids)).all()
        affected_messages = {uid.message for uid in deletes
                             if uid.message is not None}

        for uid in deletes:
            session.delete(uid)
        session.commit()

        account = Account.get(account_id, session)

        for message in affected_messages:
            if not message.imapuids and message.is_draft:
                # Synchronously delete drafts.
                thread = message.thread
                thread.messages.remove(message)
                session.delete(message)
                if not thread.messages:
                    session.delete(thread)
            else:
                update_message_metadata(session, account, message,
                                        message.is_draft)
                if not message.imapuids:
                    # But don't outright delete messages. Just mark them as
                    # 'deleted' and wait for the asynchronous
                    # dangling-message-collector to delete them.
                    message.mark_for_deletion()

        log.info('Deleted expunged UIDs', count=len(deletes))
        session.commit()
Example #19
0
def test_canonicalization(db):
    from inbox.models import Namespace, Account
    ns = Namespace()
    account = Account(namespace=ns,
                      email_address='*****@*****.**')
    db.session.add(account)
    db.session.commit()
    assert account.email_address == '*****@*****.**'

    assert db.session.query(Account). \
        filter_by(email_address='*****@*****.**').count() == 1

    assert db.session.query(Account). \
        filter_by(email_address='*****@*****.**').count() == 1

    # Check that nothing bad happens if you pass something that can't actually
    # be parsed as an email address.
    assert db.session.query(Account). \
        filter_by(email_address='foo').count() == 0
    # Flanker will parse hostnames too, don't break on that.
    assert db.session.query(Account). \
        filter_by(email_address='http://example.com').count() == 0