Exemple #1
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30, poll_frequency=30,
                 retry_fail_classes=[], refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max

        provider_supports_condstore = account.provider_info.get('condstore',
                                                                False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(self.account_id,
                                            folder_name,
                                            folder_id,
                                            self.email_address,
                                            self.provider_name,
                                            self.poll_frequency,
                                            self.syncmanager_lock,
                                            self.refresh_flags_max,
                                            self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                log.info('Folder sync engine finished/killed',
                         account_id=self.account_id,
                         folder_id=folder_id,
                         folder_name=folder_name)
                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        self.start_delete_handler()
        folders = set()
        self.start_new_folder_sync_engines(folders)
        while True:
            sleep(self.refresh_frequency)
            self.start_new_folder_sync_engines(folders)
Exemple #2
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    """
    def __init__(self, account, heartbeat=1, refresh_frequency=30):
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()
        self.delete_handler = None

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).
        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            sanitize_name(f.display_name)
            for f in raw_folders
        }

        assert "inbox" in {f.role
                           for f in raw_folders
                           }, "Account {} has no detected inbox folder".format(
                               account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info("Folder deleted from remote",
                     account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            folder = Folder.find_or_create(db_session, account,
                                           raw_folder.display_name,
                                           raw_folder.role)
            if folder.canonical_name != raw_folder.role:
                folder.canonical_name = raw_folder.role

        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {
            monitor.folder_name: monitor
            for monitor in self.folder_monitors
        }

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info(
                    "Folder sync engine started",
                    account_id=self.account_id,
                    folder_name=folder_name,
                )
                thread = self.sync_engine_class(
                    self.account_id,
                    self.namespace_id,
                    folder_name,
                    self.email_address,
                    self.provider_name,
                    self.syncmanager_lock,
                )
                self.folder_monitors.start(thread)

            while thread.state != "poll" and not thread.ready():
                sleep(self.heartbeat)

            if thread.ready():
                log.info(
                    "Folder sync engine exited",
                    account_id=self.account_id,
                    folder_name=folder_name,
                    error=thread.exception,
                )

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids,
            )
            self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                "Error authenticating; stopping sync",
                exc_info=True,
                account_id=self.account_id,
                logstash_tag="mark_invalid",
            )
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(exc)
Exemple #3
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    """

    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30):
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()
        self.delete_handler = None

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).
        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders

        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            cat = db_session.query(Category).get(
                local_folders[name].category_id)
            if cat is not None:
                db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {monitor.folder_name: monitor for monitor in
                            self.folder_monitors}
        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id,
                         folder_name=folder_name)
                thread = self.sync_engine_class(self.account_id,
                                                self.namespace_id,
                                                folder_name,
                                                self.email_address,
                                                self.provider_name,
                                                self.syncmanager_lock)
                self.folder_monitors.start(thread)
            while not thread_polling(thread) and not thread.ready():
                sleep(self.heartbeat)

            if thread.ready():
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Exemple #4
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 poll_frequency=30,
                 retry_fail_classes=[],
                 refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max
        self.saved_remote_folders = None

        provider_supports_condstore = account.provider_info.get(
            'condstore', False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
            for f in raw_folders
        }

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote',
                     account_id=self.account_id,
                     name=name)
            db_session.delete(local_folders[name])
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(
                self.account_id, folder_name, folder_id, self.email_address,
                self.provider_name, self.poll_frequency, self.syncmanager_lock,
                self.refresh_flags_max, self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                if thread.exception:
                    # Exceptions causing the folder sync to exit should not
                    # clear the heartbeat.
                    log.info('Folder sync engine exited with error',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name,
                             error=thread.exception)
                else:
                    log.info('Folder sync engine finished',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name)
                    # clear the heartbeat for this folder-thread since it
                    # exited cleanly.
                    clear_heartbeat_status(self.account_id, folder_id)

                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            folders = set()
            self.start_new_folder_sync_engines(folders)
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines(folders)
        except ValidationError as exc:
            log.error('Error authenticating; stopping sync',
                      exc_info=True,
                      account_id=self.account_id,
                      logstash_tag='mark_invalid')
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Exemple #5
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30, poll_frequency=30,
                 retry_fail_classes=[], refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max
        self.saved_remote_folders = None

        provider_supports_condstore = account.provider_info.get('condstore',
                                                                False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            db_session.delete(local_folders[name])
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(self.account_id,
                                            folder_name,
                                            folder_id,
                                            self.email_address,
                                            self.provider_name,
                                            self.poll_frequency,
                                            self.syncmanager_lock,
                                            self.refresh_flags_max,
                                            self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                if thread.exception:
                    # Exceptions causing the folder sync to exit should not
                    # clear the heartbeat.
                    log.info('Folder sync engine exited with error',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name,
                             error=thread.exception)
                else:
                    log.info('Folder sync engine finished',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name)
                    # clear the heartbeat for this folder-thread since it
                    # exited cleanly.
                    clear_heartbeat_status(self.account_id, folder_id)

                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            folders = set()
            self.start_new_folder_sync_engines(folders)
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines(folders)
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Exemple #6
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    (DEPRECATED) refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    syncback_frequency: Integer
        Seconds to wait between performing consecutive syncback iterations and
        checking for new folders to sync.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 syncback_frequency=5):
        # DEPRECATED.
        # TODO[k]: Remove after sync-syncback integration deploy is complete.
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine
        self.folder_monitors = Group()

        self.delete_handler = None

        self.syncback_handler = None
        self.folder_sync_signals = {}
        self.syncback_timestamp = None
        self.syncback_frequency = syncback_frequency

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).

        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
            for f in raw_folders
        }

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote',
                     account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {
            monitor.folder_name: monitor
            for monitor in self.folder_monitors
        }
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).options(
                load_only('_sync_status')).get(self.account_id)
            s3_resync = account._sync_status.get('s3_resync', False)

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id,
                         folder_name=folder_name)
                self._add_sync_signal(folder_name)
                thread = self.sync_engine_class(
                    self.account_id, self.namespace_id, folder_name,
                    self.email_address, self.provider_name,
                    self.syncmanager_lock,
                    self.folder_sync_signals[folder_name])
                self.folder_monitors.start(thread)

                if s3_resync:
                    log.info('Starting an S3 monitor',
                             account_id=self.account_id)
                    s3_thread = S3FolderSyncEngine(self.account_id,
                                                   self.namespace_id,
                                                   folder_name,
                                                   self.email_address,
                                                   self.provider_name,
                                                   self.syncmanager_lock, None)
                    self.folder_monitors.start(s3_thread)

            while not thread.state == 'poll' and not thread.ready():
                sleep(self.heartbeat)
                self.perform_syncback()

            if thread.ready():
                self._remove_sync_signal[folder_name]
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def perform_syncback(self):
        """
        Perform syncback for the account.

        Syncback is performed iff all folder syncs are paused, and the previous
        syncback occurred more than syncback_frequency seconds ago.

        The first condition is checked by the call to _can_syncback().
        The second condition is needed because if there are a large number of
        pending actions during initial sync, it could repeatedly get interrupted
        and put on hold for seconds at a time.

        """
        from inbox.syncback.base import SyncbackHandler

        if not self._can_syncback():
            log.info('Skipping syncback', reason='folder syncs running')
            return

        if (self.syncback_timestamp
                and (datetime.utcnow() - self.syncback_timestamp).seconds <
                self.syncback_frequency):
            log.info('Skipping syncback',
                     reason='last syncback < syncback_frequency seconds ago',
                     syncback_frequency=self.syncback_frequency)
            # Reset here so syncs can proceed
            self._signal_syncs()
            return

        if self.syncback_handler is None:
            self.syncback_handler = SyncbackHandler(self.account_id,
                                                    self.namespace_id,
                                                    self.provider_name)
        try:
            interval = ((datetime.utcnow() - self.syncback_timestamp).seconds
                        if self.syncback_timestamp else None)
            log.info('Performing syncback',
                     syncback_interval_in_seconds=interval)
            self.syncback_handler.send_client_changes()
            self.syncback_timestamp = datetime.utcnow()
        except Exception:
            # Log, set self.folder_sync_signals and then re-raise (so the
            # greenlet can be restarted etc.)
            log.error('Critical syncback error', exc_info=True)
            raise
        finally:
            # Reset here so syncs can proceed
            self._signal_syncs()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.syncback_frequency)
                self.perform_syncback()
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error('Error authenticating; stopping sync',
                      exc_info=True,
                      account_id=self.account_id,
                      logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))

    def _add_sync_signal(self, folder_name):
        self.folder_sync_signals[folder_name] = Event()
        self.folder_sync_signals[folder_name].set()

    def _remove_sync_signal(self, folder_name):
        del self.folder_sync_signals[folder_name]

    def _can_syncback(self):
        """
        Determine if syncback can occur.

        If all folder syncs are paused as indicated by the folder_sync_signals,
        returns True. Else, returns False.

        """
        return (not self.folder_sync_signals
                or all(not signal.is_set()
                       for signal in self.folder_sync_signals.values()))

    def _signal_syncs(self):
        """ Indicate that folder syncs can resume. """
        for signal in self.folder_sync_signals.values():
            signal.set()
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 poll_frequency=30,
                 retry_fail_classes=[],
                 refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max

        provider_supports_condstore = account.provider_info.get(
            'condstore', False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(
                self.account_id, folder_name, folder_id, self.email_address,
                self.provider_name, self.poll_frequency, self.syncmanager_lock,
                self.refresh_flags_max, self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                log.info('Folder sync engine finished/killed',
                         account_id=self.account_id,
                         folder_id=folder_id,
                         folder_name=folder_name)
                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        self.start_delete_handler()
        folders = set()
        self.start_new_folder_sync_engines(folders)
        while True:
            sleep(self.refresh_frequency)
            self.start_new_folder_sync_engines(folders)
Exemple #8
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    (DEPRECATED) refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    syncback_frequency: Integer
        Seconds to wait between performing consecutive syncback iterations and
        checking for new folders to sync.

    """
    def __init__(self, account, heartbeat=1, refresh_frequency=30,
                 syncback_frequency=5):
        # DEPRECATED.
        # TODO[k]: Remove after sync-syncback integration deploy is complete.
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine
        self.folder_monitors = Group()

        self.delete_handler = None

        self.syncback_handler = None
        self.folder_sync_signals = {}
        self.syncback_timestamp = None
        self.syncback_frequency = syncback_frequency

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).

        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {monitor.folder_name: monitor for monitor in
                            self.folder_monitors}
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).options(
                load_only('_sync_status')).get(self.account_id)
            s3_resync = account._sync_status.get('s3_resync', False)

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id, folder_name=folder_name)
                self._add_sync_signal(folder_name)
                thread = self.sync_engine_class(self.account_id,
                                                self.namespace_id,
                                                folder_name,
                                                self.email_address,
                                                self.provider_name,
                                                self.syncmanager_lock,
                                                self.folder_sync_signals[folder_name])
                self.folder_monitors.start(thread)

                if s3_resync:
                    log.info('Starting an S3 monitor',
                             account_id=self.account_id)
                    s3_thread = S3FolderSyncEngine(self.account_id,
                                                   self.namespace_id,
                                                   folder_name,
                                                   self.email_address,
                                                   self.provider_name,
                                                   self.syncmanager_lock,
                                                   None)
                    self.folder_monitors.start(s3_thread)

            while not thread.state == 'poll' and not thread.ready():
                sleep(self.heartbeat)
                self.perform_syncback()

            if thread.ready():
                self._remove_sync_signal[folder_name]
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def perform_syncback(self):
        """
        Perform syncback for the account.

        Syncback is performed iff all folder syncs are paused, and the previous
        syncback occurred more than syncback_frequency seconds ago.

        The first condition is checked by the call to _can_syncback().
        The second condition is needed because if there are a large number of
        pending actions during initial sync, it could repeatedly get interrupted
        and put on hold for seconds at a time.

        """
        from inbox.syncback.base import SyncbackHandler

        if not self._can_syncback():
            log.info('Skipping syncback', reason='folder syncs running')
            return

        if (self.syncback_timestamp and
                (datetime.utcnow() - self.syncback_timestamp).seconds <
                 self.syncback_frequency):
            log.info('Skipping syncback',
                     reason='last syncback < syncback_frequency seconds ago',
                     syncback_frequency=self.syncback_frequency)
            # Reset here so syncs can proceed
            self._signal_syncs()
            return

        if self.syncback_handler is None:
            self.syncback_handler = SyncbackHandler(self.account_id,
                                                    self.namespace_id,
                                                    self.provider_name)
        try:
            interval = ((datetime.utcnow() - self.syncback_timestamp).seconds
                        if self.syncback_timestamp else None)
            log.info('Performing syncback', syncback_interval_in_seconds=interval)
            self.syncback_handler.send_client_changes()
            self.syncback_timestamp = datetime.utcnow()
        except Exception:
            # Log, set self.folder_sync_signals and then re-raise (so the
            # greenlet can be restarted etc.)
            log.error('Critical syncback error', exc_info=True)
            raise
        finally:
            # Reset here so syncs can proceed
            self._signal_syncs()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.syncback_frequency)
                self.perform_syncback()
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))

    def _add_sync_signal(self, folder_name):
        self.folder_sync_signals[folder_name] = Event()
        self.folder_sync_signals[folder_name].set()

    def _remove_sync_signal(self, folder_name):
        del self.folder_sync_signals[folder_name]

    def _can_syncback(self):
        """
        Determine if syncback can occur.

        If all folder syncs are paused as indicated by the folder_sync_signals,
        returns True. Else, returns False.

        """
        return (not self.folder_sync_signals or
                all(not signal.is_set() for signal in
                    self.folder_sync_signals.values()))

    def _signal_syncs(self):
        """ Indicate that folder syncs can resume. """
        for signal in self.folder_sync_signals.values():
            signal.set()