Ejemplo n.º 1
0
class Redirector(Greenlet):
    def __init__(self, msg):
        self.sock_type = msg.sock_type
        self.info = msg
        self.sock = socket.socket(socket.AF_INET, self.sock_type)
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.links = Group()
        super(Redirector, self).__init__()

    def _run(self):
        self.sock.bind(self.info.local)
        self.sock.listen(64)

        while True:
            cli, addr = self.sock.accept()
            self.links.start(Linker(self.info.remote, self.sock_type, cli))

    def kill(self):
        self.links.kill()
        super(Redirector, self).kill()
        self.sock.close()

    def dump(self):
        return dict(protocol=self.info.protocol,
            local='%s:%d' % self.info.local,
            remote='%s:%d' % self.info.remote)
Ejemplo n.º 2
0
def main(workers):
    create_app(config=CONFIG_MODULE, config_logger=False)
    queue = SubmitQueue()
    worker_group = Group()
    for _ in range(workers):
        worker_group.start(SubmitWorker(queue))
    worker_group.join()
Ejemplo n.º 3
0
    def start_node_monitor(self):
        def _monitor(runner: NodeRunner):
            while not self._runner.root_task.done:
                if runner.state is NodeState.STARTED:
                    runner.check()
                gevent.sleep(0.5)

        monitor_group = Group()
        for runner in self._node_runners:
            monitor_group.start(Greenlet(_monitor, runner))

        def _wait():
            while not monitor_group.join(0.5, raise_error=True):
                pass

        return gevent.spawn(_wait)
Ejemplo n.º 4
0
    def start_node_monitor(self):
        def _monitor(runner: NodeRunner):
            while not self._runner.root_task.done:
                if runner.state is NodeState.STARTED:
                    try:
                        runner.executor.check_subprocess()
                    except ProcessExitedWithError as ex:
                        raise ScenarioError(
                            f'Raiden node {runner._index} died with non-zero exit status',
                        ) from ex
                gevent.sleep(.5)

        monitor_group = Group()
        for runner in self._node_runners:
            monitor_group.start(Greenlet(_monitor, runner))

        def _wait():
            while not monitor_group.join(.5, raise_error=True):
                pass

        return gevent.spawn(_wait)
Ejemplo n.º 5
0
    def start_node_monitor(self):
        def _monitor(runner: NodeRunner):
            while not self._runner.root_task.done:
                if runner.state is NodeState.STARTED:
                    try:
                        runner.executor.check_subprocess()
                    except ProcessExitedWithError as ex:
                        raise ScenarioError(
                            f'Raiden node {runner._index} died with non-zero exit status',
                        ) from ex
                gevent.sleep(.5)

        monitor_group = Group()
        for runner in self._node_runners:
            monitor_group.start(Greenlet(_monitor, runner))

        def _wait():
            while not monitor_group.join(.5, raise_error=True):
                pass

        return gevent.spawn(_wait)
Ejemplo n.º 6
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    """
    def __init__(self, account, heartbeat=1, refresh_frequency=30):
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()
        self.delete_handler = None

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).
        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            sanitize_name(f.display_name)
            for f in raw_folders
        }

        assert "inbox" in {f.role
                           for f in raw_folders
                           }, "Account {} has no detected inbox folder".format(
                               account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info("Folder deleted from remote",
                     account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            folder = Folder.find_or_create(db_session, account,
                                           raw_folder.display_name,
                                           raw_folder.role)
            if folder.canonical_name != raw_folder.role:
                folder.canonical_name = raw_folder.role

        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {
            monitor.folder_name: monitor
            for monitor in self.folder_monitors
        }

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info(
                    "Folder sync engine started",
                    account_id=self.account_id,
                    folder_name=folder_name,
                )
                thread = self.sync_engine_class(
                    self.account_id,
                    self.namespace_id,
                    folder_name,
                    self.email_address,
                    self.provider_name,
                    self.syncmanager_lock,
                )
                self.folder_monitors.start(thread)

            while thread.state != "poll" and not thread.ready():
                sleep(self.heartbeat)

            if thread.ready():
                log.info(
                    "Folder sync engine exited",
                    account_id=self.account_id,
                    folder_name=folder_name,
                    error=thread.exception,
                )

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids,
            )
            self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                "Error authenticating; stopping sync",
                exc_info=True,
                account_id=self.account_id,
                logstash_tag="mark_invalid",
            )
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(exc)
Ejemplo n.º 7
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 poll_frequency=30,
                 retry_fail_classes=[],
                 refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max
        self.saved_remote_folders = None

        provider_supports_condstore = account.provider_info.get(
            'condstore', False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
            for f in raw_folders
        }

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote',
                     account_id=self.account_id,
                     name=name)
            db_session.delete(local_folders[name])
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(
                self.account_id, folder_name, folder_id, self.email_address,
                self.provider_name, self.poll_frequency, self.syncmanager_lock,
                self.refresh_flags_max, self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                if thread.exception:
                    # Exceptions causing the folder sync to exit should not
                    # clear the heartbeat.
                    log.info('Folder sync engine exited with error',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name,
                             error=thread.exception)
                else:
                    log.info('Folder sync engine finished',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name)
                    # clear the heartbeat for this folder-thread since it
                    # exited cleanly.
                    clear_heartbeat_status(self.account_id, folder_id)

                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            folders = set()
            self.start_new_folder_sync_engines(folders)
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines(folders)
        except ValidationError as exc:
            log.error('Error authenticating; stopping sync',
                      exc_info=True,
                      account_id=self.account_id,
                      logstash_tag='mark_invalid')
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Ejemplo n.º 8
0
 def start(self, greenlet=None):
     """Start the greenlet pool or add a greenlet to the pool."""
     if greenlet is not None:
         return Group.start(self, greenlet)
Ejemplo n.º 9
0
 def _run(self, *args, **kwargs):
     group = Group()
     for task in self._tasks:
         group.start(Greenlet(task))
     group.join(raise_error=True)
Ejemplo n.º 10
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30, poll_frequency=30,
                 retry_fail_classes=[], refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max
        self.saved_remote_folders = None

        provider_supports_condstore = account.provider_info.get('condstore',
                                                                False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            db_session.delete(local_folders[name])
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(self.account_id,
                                            folder_name,
                                            folder_id,
                                            self.email_address,
                                            self.provider_name,
                                            self.poll_frequency,
                                            self.syncmanager_lock,
                                            self.refresh_flags_max,
                                            self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                if thread.exception:
                    # Exceptions causing the folder sync to exit should not
                    # clear the heartbeat.
                    log.info('Folder sync engine exited with error',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name,
                             error=thread.exception)
                else:
                    log.info('Folder sync engine finished',
                             account_id=self.account_id,
                             folder_id=folder_id,
                             folder_name=folder_name)
                    # clear the heartbeat for this folder-thread since it
                    # exited cleanly.
                    clear_heartbeat_status(self.account_id, folder_id)

                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            folders = set()
            self.start_new_folder_sync_engines(folders)
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines(folders)
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Ejemplo n.º 11
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    (DEPRECATED) refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    syncback_frequency: Integer
        Seconds to wait between performing consecutive syncback iterations and
        checking for new folders to sync.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 syncback_frequency=5):
        # DEPRECATED.
        # TODO[k]: Remove after sync-syncback integration deploy is complete.
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine
        self.folder_monitors = Group()

        self.delete_handler = None

        self.syncback_handler = None
        self.folder_sync_signals = {}
        self.syncback_timestamp = None
        self.syncback_frequency = syncback_frequency

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).

        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {
            f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
            for f in raw_folders
        }

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {
            f.name: f
            for f in db_session.query(Folder).filter(
                Folder.account_id == self.account_id)
        }

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote',
                     account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {
            monitor.folder_name: monitor
            for monitor in self.folder_monitors
        }
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).options(
                load_only('_sync_status')).get(self.account_id)
            s3_resync = account._sync_status.get('s3_resync', False)

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id,
                         folder_name=folder_name)
                self._add_sync_signal(folder_name)
                thread = self.sync_engine_class(
                    self.account_id, self.namespace_id, folder_name,
                    self.email_address, self.provider_name,
                    self.syncmanager_lock,
                    self.folder_sync_signals[folder_name])
                self.folder_monitors.start(thread)

                if s3_resync:
                    log.info('Starting an S3 monitor',
                             account_id=self.account_id)
                    s3_thread = S3FolderSyncEngine(self.account_id,
                                                   self.namespace_id,
                                                   folder_name,
                                                   self.email_address,
                                                   self.provider_name,
                                                   self.syncmanager_lock, None)
                    self.folder_monitors.start(s3_thread)

            while not thread.state == 'poll' and not thread.ready():
                sleep(self.heartbeat)
                self.perform_syncback()

            if thread.ready():
                self._remove_sync_signal[folder_name]
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def perform_syncback(self):
        """
        Perform syncback for the account.

        Syncback is performed iff all folder syncs are paused, and the previous
        syncback occurred more than syncback_frequency seconds ago.

        The first condition is checked by the call to _can_syncback().
        The second condition is needed because if there are a large number of
        pending actions during initial sync, it could repeatedly get interrupted
        and put on hold for seconds at a time.

        """
        from inbox.syncback.base import SyncbackHandler

        if not self._can_syncback():
            log.info('Skipping syncback', reason='folder syncs running')
            return

        if (self.syncback_timestamp
                and (datetime.utcnow() - self.syncback_timestamp).seconds <
                self.syncback_frequency):
            log.info('Skipping syncback',
                     reason='last syncback < syncback_frequency seconds ago',
                     syncback_frequency=self.syncback_frequency)
            # Reset here so syncs can proceed
            self._signal_syncs()
            return

        if self.syncback_handler is None:
            self.syncback_handler = SyncbackHandler(self.account_id,
                                                    self.namespace_id,
                                                    self.provider_name)
        try:
            interval = ((datetime.utcnow() - self.syncback_timestamp).seconds
                        if self.syncback_timestamp else None)
            log.info('Performing syncback',
                     syncback_interval_in_seconds=interval)
            self.syncback_handler.send_client_changes()
            self.syncback_timestamp = datetime.utcnow()
        except Exception:
            # Log, set self.folder_sync_signals and then re-raise (so the
            # greenlet can be restarted etc.)
            log.error('Critical syncback error', exc_info=True)
            raise
        finally:
            # Reset here so syncs can proceed
            self._signal_syncs()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.syncback_frequency)
                self.perform_syncback()
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error('Error authenticating; stopping sync',
                      exc_info=True,
                      account_id=self.account_id,
                      logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))

    def _add_sync_signal(self, folder_name):
        self.folder_sync_signals[folder_name] = Event()
        self.folder_sync_signals[folder_name].set()

    def _remove_sync_signal(self, folder_name):
        del self.folder_sync_signals[folder_name]

    def _can_syncback(self):
        """
        Determine if syncback can occur.

        If all folder syncs are paused as indicated by the folder_sync_signals,
        returns True. Else, returns False.

        """
        return (not self.folder_sync_signals
                or all(not signal.is_set()
                       for signal in self.folder_sync_signals.values()))

    def _signal_syncs(self):
        """ Indicate that folder syncs can resume. """
        for signal in self.folder_sync_signals.values():
            signal.set()
Ejemplo n.º 12
0
class Consumer(object):
    #: The debug flag
    #:
    #: This attribute can also be configured from the config with the ``DEBUG``
    #: configuration key.  Defaults to ``False``.
    debug: ConfigAttribute = ConfigAttribute('DEBUG')
    #: A :class:`~datetime.timedelta` which is used as
    #: shard iterator interval.
    #:
    #: This attribute can also be configured from the config with
    #: ``SHARD_ITERATOR_INTERVAL`` configuration key.  Defaults to
    #: ``timedelta(seconds=1)``
    shard_iterator_interval: ConfigAttribute = ConfigAttribute(
        'SHARD_ITERATOR_INTERVAL',
        get_converter=_make_timedelta
    )
    #: A :class:`~datetime.timedelta` which is used as
    #: shard monitoring interval.
    #:
    #: This attribute can also be configured from the config with
    #: ``SHARD_MONITORING_INTERVAL`` configuration key.  Defaults to
    #: ``timedelta(hours=1)``
    shard_monitoring_interval: ConfigAttribute = ConfigAttribute(
        'SHARD_MONITORING_INTERVAL',
        get_converter=_make_timedelta
    )
    #: A :class:`~datetime.timedelta` which is used as overhang interval.
    #:
    #: This attribute can also be configured from the config with
    #: ``PROTRACTOR_OVERHANG_INTERVAL`` configuration key.  Defaults to
    #: ``timedelta(seconds=30)``
    protractor_overhang_interval: ConfigAttribute = ConfigAttribute(
        'PROTRACTOR_OVERHANG_INTERVAL',
        get_converter=_make_timedelta
    )
    #: Default configuration parameters.
    __default_config: ImmutableDict = ImmutableDict({
        'DEBUG': False,
        'STREAM_REGION': 'ap-south-1',
        'STREAM_NAME': None,
        'SHARD_ITERATOR_TYPE': 'TRIM_HORIZON',
        'SHARD_READ_LIMIT': 50,
        'SHARD_ITERATOR_INTERVAL': timedelta(seconds=1),
        'SHARD_MONITORING_INTERVAL': timedelta(hours=1),
        'PROTRACTOR_ENABLE': False,
        'PROTRACTOR_OVERHANG_INTERVAL': timedelta(seconds=30),
        'LOGGER_HANDLER_POLICY': 'always',
        'LOG_ROLLOVER': 'd',
        'LOG_INTERVAL': 1,
        'LOG_BACKUP_COUNT': 2,
        'BUCKET_SIZE_LIMIT': 10000,
        'BUCKET_COUNT_LIMIT': 120,
    })
    #: The name of the package or module that this consumer belongs to.
    #: Do not change this once it is set by the constructor.
    import_name: str = None
    #: Absolute path to the package on the filesystem.
    root_path: str = None

    def __init__(self,
                 import_name: str,
                 root_path: str = None,
                 stream_region: str = None,
                 stream_name: str = None,
                 log_folder: str = 'log',
                 checkpointer: Checkpointer = None) -> None:
        self.import_name = import_name
        if root_path is None:
            root_path = _get_root_path(import_name)
        self.root_path = root_path
        self.log_folder = log_folder

        #: The configuration directory as :class:`Config`.
        self.config = Config(self.root_path, self.__default_config)
        if stream_region is not None:
            self.config['STREAM_REGION'] = stream_region
        if stream_name is not None:
            self.config['STREAM_NAME'] = stream_name

        #:
        self.checkpointer = checkpointer
        if self.checkpointer is None:
            self.checkpointer = InMemoryCheckpointer()

        #: A list of functions that will be called at the bucket is full.
        self.__transform_funcs = []
        #: A list of functions that should be called after transform.
        self.__after_consume_func = []
        #: A list of functions that are called when the consumer context
        #: is destroyed.  Since the consumer context is also torn down
        self.__teardown_consumer_func = []

        #:
        self.__threads = Group()
        self.shards = set()

    @locked_cached_property
    def name(self) -> str:
        if self.import_name == '__main__':
            fn = getattr(sys.modules['__main__'], '__file__', None)
            if fn is None:
                return '__main__'
            return os.path.splitext(os.path.basename(fn))[0]
        return self.import_name

    @locked_cached_property
    def logger(self) -> _Logger:
        return create_logger(self)

    @locked_cached_property
    def kinesis_client(self):
        return boto3.client('kinesis',
                            region_name=self.config['STREAM_REGION'])

    @typechecked
    def transform(self, func: Callable[[List[Any],
                                        str,
                                        str,
                                        datetime],
                                       List[Any]]) -> Callable:
        self.__transform_funcs.append(func)
        return func

    @typechecked
    def after_consume(self, func: Callable[[Optional[List[Any]],
                                            str,
                                            Optional[str],
                                            Optional[datetime]],
                                           None]) -> Callable:
        self.__after_consume_func.append(func)
        return func

    @typechecked
    def teardown_consumer(self, func: Callable[[Any], None]) -> Callable:
        self.__teardown_consumer_func.append(func)
        return func

    @typechecked
    def do_transform(self,
                     data: List[Any],
                     shard_id: str,
                     last_sequence_number: str,
                     last_arrival_timestamp: datetime) -> List[Any]:
        for func in reversed(self.__transform_funcs):
            data = func(
                data,
                shard_id,
                last_sequence_number,
                last_arrival_timestamp
            )
        return data

    @typechecked
    def do_after_consume(self,
                         data: Optional[List[Any]],
                         shard_id: str,
                         last_sequence_number: Optional[str],
                         last_arrival_timestamp: Optional[datetime]) -> None:
        for func in reversed(self.__after_consume_func):
            func(
                data,
                shard_id,
                last_sequence_number,
                last_arrival_timestamp
            )

    @typechecked
    def do_teardown_consumer(self, exc=_sentinel) -> None:
        if exc is _sentinel:
            exc = sys.exc_info()[1]
        for func in reversed(self.__teardown_consumer_func):
            func(exc)

    def handle_shard_exception(self, e) -> None:
        exc_type, exc_value, tb = sys.exc_info()
        assert exc_value is e
        self.log_exception((exc_type, exc_value, tb))

    def handle_exception(self, e) -> None:
        exc_type, exc_value, tb = sys.exc_info()
        self.log_exception((exc_type, exc_value, tb))

    def log_exception(self, exc_info) -> None:
        if has_shard_context():
            self.logger.error(
                'Exception on {0}'.format(current_shard.id),
                exc_info=exc_info
            )
        else:
            self.logger.error(
                'Exception', exc_info=exc_info
            )

    def get_context(self) -> ConsumerContext:
        return ConsumerContext(self)

    def get_stream(self) -> KinesisStream:
        return KinesisStream(self.kinesis_client.describe_stream(
            StreamName=self.config['STREAM_NAME']
        ))

    def dispatch(self) -> None:
        stream = self.get_stream()
        if stream.status == 'ACTIVE':
            gevent.signal(signal.SIGQUIT, gevent.killall)
            shards = stream.get_shards(self)
            for shard in shards:
                self.spawn_shard(shard)
            self.__threads.start(ShardMonitor(self))
            self.__threads.join()
        else:
            sys.exit()

    def spawn_shard(self, shard: KinesisShard) -> None:
        self.__threads.start(shard)
        self.shards.add(shard)

    def close_shard(self, shard: KinesisShard) -> None:
        self.logger.warn('Stream \'{0}\' Shard \'{1}\' closed'.format(
            self.config['STREAM_NAME'], shard.id
        ))
        self.shards.remove(shard)

    def process(self, debug=None) -> None:
        if debug is not None:
            self.debug = bool(debug)
        ctx = self.get_context()
        error = None
        try:
            try:
                ctx.push()
                self.dispatch()
            except Exception as e:
                error = e
                self.handle_exception(e)
        finally:
            ctx.pop(error)

    def __repr__(self) -> str:
        return '<{0!s} {1!r} - \'{2!s}\'>'.format(
            self.__class__.__name__,
            self.name,
            self.config['STREAM_NAME']
        )
Ejemplo n.º 13
0
	def start(self,greenlet=None):
		"""Start the greenlet pool or add a greenlet to the pool."""
		if greenlet is not None:
			return Group.start(self,greenlet)
Ejemplo n.º 14
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self,
                 account,
                 heartbeat=1,
                 refresh_frequency=30,
                 poll_frequency=30,
                 retry_fail_classes=[],
                 refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max

        provider_supports_condstore = account.provider_info.get(
            'condstore', False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(
                self.account_id, folder_name, folder_id, self.email_address,
                self.provider_name, self.poll_frequency, self.syncmanager_lock,
                self.refresh_flags_max, self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                log.info('Folder sync engine finished/killed',
                         account_id=self.account_id,
                         folder_id=folder_id,
                         folder_name=folder_name)
                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        self.start_delete_handler()
        folders = set()
        self.start_new_folder_sync_engines(folders)
        while True:
            sleep(self.refresh_frequency)
            self.start_new_folder_sync_engines(folders)
Ejemplo n.º 15
0
def main(workers):
    queue = SubmitQueue()
    worker_group = Group()
    for _ in range(workers):
        worker_group.start(SubmitWorker(queue))
    worker_group.join()
Ejemplo n.º 16
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    """

    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30):
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()
        self.delete_handler = None

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).
        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders

        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            cat = db_session.query(Category).get(
                local_folders[name].category_id)
            if cat is not None:
                db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {monitor.folder_name: monitor for monitor in
                            self.folder_monitors}
        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id,
                         folder_name=folder_name)
                thread = self.sync_engine_class(self.account_id,
                                                self.namespace_id,
                                                folder_name,
                                                self.email_address,
                                                self.provider_name,
                                                self.syncmanager_lock)
                self.folder_monitors.start(thread)
            while not thread_polling(thread) and not thread.ready():
                sleep(self.heartbeat)

            if thread.ready():
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.refresh_frequency)
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
Ejemplo n.º 17
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self, account, heartbeat=1, poll_frequency=30,
                 retry_fail_classes=[], refresh_flags_max=2000):
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = db_write_lock(account.namespace.id)
        self.refresh_flags_max = refresh_flags_max

        provider_supports_condstore = account.provider_info.get('condstore',
                                                                False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        self.sync_status_queue = Queue()
        self.folder_monitors.start(Greenlet(self.sync_status_consumer))

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids

    def sync(self):
        """ Start per-folder syncs. Only have one per-folder sync in the
            'initial' state at a time.
        """
        sync_folder_names_ids = self.prepare_sync()
        for folder_name, folder_id in sync_folder_names_ids:
            log.info('initializing folder sync')
            thread = self.sync_engine_class(self.account_id,
                                            folder_name,
                                            folder_id,
                                            self.email_address,
                                            self.provider_name,
                                            self.poll_frequency,
                                            self.syncmanager_lock,
                                            self.refresh_flags_max,
                                            self.retry_fail_classes,
                                            self.sync_status_queue)
            thread.start()
            self.folder_monitors.add(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # Allow individual folder sync monitors to shut themselves down
            # after completing the initial sync.
            if thread_finished(thread) or thread.ready():
                log.info('folder sync finished/killed',
                         folder_name=thread.folder_name)
                # NOTE: Greenlet is automatically removed from the group.

        self.folder_monitors.join()

    def sync_status_consumer(self):
        """Consume per-monitor sync status queue and update the
        ImapFolderSyncStatus table accordingly.
        Nothing fancy is happening as of now but here we may implement some
        batching to reduce the stress of the database."""
        while True:
            folder_id, state = self.sync_status_queue.get()
            with mailsync_session_scope() as db_session:
                sync_status_entry = db_session.query(ImapFolderSyncStatus)\
                    .filter_by(account_id=self.account_id, folder_id=folder_id)\
                    .options(load_only(ImapFolderSyncStatus.state)).one()
                sync_status_entry.state = state
                db_session.add(sync_status_entry)
                db_session.commit()
Ejemplo n.º 18
0
class Server(gevent.Greenlet):
    def __init__(self, config, context=None, quiet=False):
        super(Server, self).__init__()
        self.config = config
        self.context = context or zmq.Context.instance()
        self.quiet = quiet

        # dict of the zeromq sockets we use
        self.sockets = {}

        _collect = self.context.socket(zmq.SUB)
        _collect.setsockopt_string(zmq.SUBSCRIBE, '')
        _collect.bind(zerolog.get_endpoint(self.config['endpoints']['collect']))
        self.sockets['collect'] = _collect

        _publish = self.context.socket(zmq.XPUB)
        _publish.hwm = 100000
        _publish.linger = 1000
        _publish.setsockopt(zmq.XPUB_VERBOSE, 1)
        _publish.bind(zerolog.get_endpoint(self.config['endpoints']['publish']))
        self.sockets['publish'] = _publish

        _control = self.context.socket(zmq.ROUTER)
        _control.linger = 0
        _control.bind(zerolog.get_endpoint(self.config['endpoints']['control']))
        self.sockets['control'] = _control

        self.manager = ConfigManager(self.sockets['publish'], self.config)
        self.controller = Controller(self.sockets['control'], self.manager)
        self.dispatcher = Dispatcher(self.sockets['collect'], self.sockets['publish'], quiet=self.quiet)

        self.greenlets = Group()
        self.log = logging.getLogger('zerolog')
        self._keep_going = True

    def _run(self):
        self.greenlets.start(self.manager)
        self.greenlets.start(self.controller)
        self.greenlets.start(self.dispatcher)
        #self.greenlets.add(gevent.spawn(self.__client_emulator))
        self.greenlets.join()

    def kill(self, exception=gevent.GreenletExit, **kwargs):
        self._keep_going = False
        self.greenlets.kill()
        for _socket in self.sockets.values():
            _socket.close()
        super(Server, self).kill(exception=exception, **kwargs)

    def __client_emulator(self):
        """Emulate a tool/sysadmin changing log levels.
        """
        levels = 'critical error warning info debug'.split()
        import random
        while self._keep_going:
            loggers = list(self.manager.subscribed_loggers)
            self.log.info('subscribed loggers: {0}'.format(loggers))
            if loggers:
                logger_name = random.choice(list(loggers))
                self.manager.update(logger_name, {
                    'level': random.choice(levels),
                    'propagate': random.choice([0,1]),
                })
                self.manager.configure(logger_name)
            gevent.sleep(5)
Ejemplo n.º 19
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    poll_frequency: Integer
        Seconds to wait between polling for the greenlets spawned
    refresh_flags_max: Integer
        the maximum number of UIDs for which we'll check flags
        periodically.

    """
    def __init__(self, account,
                 heartbeat=1, refresh_frequency=30, poll_frequency=30,
                 retry_fail_classes=[], refresh_flags_max=2000):
        self.refresh_frequency = refresh_frequency
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.refresh_flags_max = refresh_flags_max

        provider_supports_condstore = account.provider_info.get('condstore',
                                                                False)
        account_supports_condstore = getattr(account, 'supports_condstore',
                                             False)
        if provider_supports_condstore or account_supports_condstore:
            self.sync_engine_class = CondstoreFolderSyncEngine
        else:
            self.sync_engine_class = FolderSyncEngine

        self.folder_monitors = Group()

        BaseMailSyncMonitor.__init__(self, account, heartbeat,
                                     retry_fail_classes)

    @retry_crispin
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids

    def start_new_folder_sync_engines(self, folders=set()):
        new_folders = [f for f in self.prepare_sync() if f not in folders]
        for folder_name, folder_id in new_folders:
            log.info('Folder sync engine started',
                     account_id=self.account_id,
                     folder_id=folder_id,
                     folder_name=folder_name)
            thread = self.sync_engine_class(self.account_id,
                                            folder_name,
                                            folder_id,
                                            self.email_address,
                                            self.provider_name,
                                            self.poll_frequency,
                                            self.syncmanager_lock,
                                            self.refresh_flags_max,
                                            self.retry_fail_classes)
            self.folder_monitors.start(thread)
            while not thread_polling(thread) and \
                    not thread_finished(thread) and \
                    not thread.ready():
                sleep(self.heartbeat)

            # allow individual folder sync monitors to shut themselves down
            # after completing the initial sync
            if thread_finished(thread) or thread.ready():
                log.info('Folder sync engine finished/killed',
                         account_id=self.account_id,
                         folder_id=folder_id,
                         folder_name=folder_name)
                # note: thread is automatically removed from
                # self.folder_monitors
            else:
                folders.add((folder_name, folder_id))

    def start_delete_handler(self):
        self.delete_handler = DeleteHandler(account_id=self.account_id,
                                            namespace_id=self.namespace_id,
                                            uid_accessor=lambda m: m.imapuids)
        self.delete_handler.start()

    def sync(self):
        self.start_delete_handler()
        folders = set()
        self.start_new_folder_sync_engines(folders)
        while True:
            sleep(self.refresh_frequency)
            self.start_new_folder_sync_engines(folders)
Ejemplo n.º 20
0
class ImapSyncMonitor(BaseMailSyncMonitor):
    """
    Top-level controller for an account's mail sync. Spawns individual
    FolderSync greenlets for each folder.

    Parameters
    ----------
    heartbeat: Integer
        Seconds to wait between checking on folder sync threads.
    (DEPRECATED) refresh_frequency: Integer
        Seconds to wait between checking for new folders to sync.
    syncback_frequency: Integer
        Seconds to wait between performing consecutive syncback iterations and
        checking for new folders to sync.

    """
    def __init__(self, account, heartbeat=1, refresh_frequency=30,
                 syncback_frequency=5):
        # DEPRECATED.
        # TODO[k]: Remove after sync-syncback integration deploy is complete.
        self.refresh_frequency = refresh_frequency
        self.syncmanager_lock = BoundedSemaphore(1)
        self.saved_remote_folders = None
        self.sync_engine_class = FolderSyncEngine
        self.folder_monitors = Group()

        self.delete_handler = None

        self.syncback_handler = None
        self.folder_sync_signals = {}
        self.syncback_timestamp = None
        self.syncback_frequency = syncback_frequency

        BaseMailSyncMonitor.__init__(self, account, heartbeat)

    @retry_crispin
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of folder names for the folders we want to sync (in order).

        """
        with connection_pool(self.account_id).get() as crispin_client:
            # Get a fresh list of the folder names from the remote
            remote_folders = crispin_client.folders()
            # The folders we should be syncing
            sync_folders = crispin_client.sync_folders()

        if self.saved_remote_folders != remote_folders:
            with session_scope(self.namespace_id) as db_session:
                self.save_folder_names(db_session, remote_folders)
                self.saved_remote_folders = remote_folders
        return sync_folders

    def save_folder_names(self, db_session, raw_folders):
        """
        Save the folders present on the remote backend for an account.

        * Create Folder objects.
        * Delete Folders that no longer exist on the remote.

        Notes
        -----
        Generic IMAP uses folders (not labels).
        Canonical folders ('inbox') and other folders are created as Folder
        objects only accordingly.

        We don't canonicalize folder names to lowercase when saving because
        different backends may be case-sensitive or otherwise - code that
        references saved folder names should canonicalize if needed when doing
        comparisons.

        """
        account = db_session.query(Account).get(self.account_id)
        remote_folder_names = {f.display_name.rstrip()[:MAX_FOLDER_NAME_LENGTH]
                               for f in raw_folders}

        assert 'inbox' in {f.role for f in raw_folders},\
            'Account {} has no detected inbox folder'.\
            format(account.email_address)

        local_folders = {f.name: f for f in db_session.query(Folder).filter(
                         Folder.account_id == self.account_id)}

        # Delete folders no longer present on the remote.
        # Note that the folder with canonical_name='inbox' cannot be deleted;
        # remote_folder_names will always contain an entry corresponding to it.
        discard = set(local_folders) - remote_folder_names
        for name in discard:
            log.info('Folder deleted from remote', account_id=self.account_id,
                     name=name)
            if local_folders[name].category_id is not None:
                cat = db_session.query(Category).get(
                    local_folders[name].category_id)
                if cat is not None:
                    db_session.delete(cat)
            del local_folders[name]

        # Create new folders
        for raw_folder in raw_folders:
            Folder.find_or_create(db_session, account, raw_folder.display_name,
                                  raw_folder.role)
        # Set the should_run bit for existing folders to True (it's True by
        # default for new ones.)
        for f in local_folders.values():
            if f.imapsyncstatus:
                f.imapsyncstatus.sync_should_run = True

        db_session.commit()

    def start_new_folder_sync_engines(self):
        running_monitors = {monitor.folder_name: monitor for monitor in
                            self.folder_monitors}
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).options(
                load_only('_sync_status')).get(self.account_id)
            s3_resync = account._sync_status.get('s3_resync', False)

        for folder_name in self.prepare_sync():
            if folder_name in running_monitors:
                thread = running_monitors[folder_name]
            else:
                log.info('Folder sync engine started',
                         account_id=self.account_id, folder_name=folder_name)
                self._add_sync_signal(folder_name)
                thread = self.sync_engine_class(self.account_id,
                                                self.namespace_id,
                                                folder_name,
                                                self.email_address,
                                                self.provider_name,
                                                self.syncmanager_lock,
                                                self.folder_sync_signals[folder_name])
                self.folder_monitors.start(thread)

                if s3_resync:
                    log.info('Starting an S3 monitor',
                             account_id=self.account_id)
                    s3_thread = S3FolderSyncEngine(self.account_id,
                                                   self.namespace_id,
                                                   folder_name,
                                                   self.email_address,
                                                   self.provider_name,
                                                   self.syncmanager_lock,
                                                   None)
                    self.folder_monitors.start(s3_thread)

            while not thread.state == 'poll' and not thread.ready():
                sleep(self.heartbeat)
                self.perform_syncback()

            if thread.ready():
                self._remove_sync_signal[folder_name]
                log.info('Folder sync engine exited',
                         account_id=self.account_id,
                         folder_name=folder_name,
                         error=thread.exception)

    def start_delete_handler(self):
        if self.delete_handler is None:
            self.delete_handler = DeleteHandler(
                account_id=self.account_id,
                namespace_id=self.namespace_id,
                provider_name=self.provider_name,
                uid_accessor=lambda m: m.imapuids)
            self.delete_handler.start()

    def perform_syncback(self):
        """
        Perform syncback for the account.

        Syncback is performed iff all folder syncs are paused, and the previous
        syncback occurred more than syncback_frequency seconds ago.

        The first condition is checked by the call to _can_syncback().
        The second condition is needed because if there are a large number of
        pending actions during initial sync, it could repeatedly get interrupted
        and put on hold for seconds at a time.

        """
        from inbox.syncback.base import SyncbackHandler

        if not self._can_syncback():
            log.info('Skipping syncback', reason='folder syncs running')
            return

        if (self.syncback_timestamp and
                (datetime.utcnow() - self.syncback_timestamp).seconds <
                 self.syncback_frequency):
            log.info('Skipping syncback',
                     reason='last syncback < syncback_frequency seconds ago',
                     syncback_frequency=self.syncback_frequency)
            # Reset here so syncs can proceed
            self._signal_syncs()
            return

        if self.syncback_handler is None:
            self.syncback_handler = SyncbackHandler(self.account_id,
                                                    self.namespace_id,
                                                    self.provider_name)
        try:
            interval = ((datetime.utcnow() - self.syncback_timestamp).seconds
                        if self.syncback_timestamp else None)
            log.info('Performing syncback', syncback_interval_in_seconds=interval)
            self.syncback_handler.send_client_changes()
            self.syncback_timestamp = datetime.utcnow()
        except Exception:
            # Log, set self.folder_sync_signals and then re-raise (so the
            # greenlet can be restarted etc.)
            log.error('Critical syncback error', exc_info=True)
            raise
        finally:
            # Reset here so syncs can proceed
            self._signal_syncs()

    def sync(self):
        try:
            self.start_delete_handler()
            self.start_new_folder_sync_engines()
            while True:
                sleep(self.syncback_frequency)
                self.perform_syncback()
                self.start_new_folder_sync_engines()
        except ValidationError as exc:
            log.error(
                'Error authenticating; stopping sync', exc_info=True,
                account_id=self.account_id, logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))

    def _add_sync_signal(self, folder_name):
        self.folder_sync_signals[folder_name] = Event()
        self.folder_sync_signals[folder_name].set()

    def _remove_sync_signal(self, folder_name):
        del self.folder_sync_signals[folder_name]

    def _can_syncback(self):
        """
        Determine if syncback can occur.

        If all folder syncs are paused as indicated by the folder_sync_signals,
        returns True. Else, returns False.

        """
        return (not self.folder_sync_signals or
                all(not signal.is_set() for signal in
                    self.folder_sync_signals.values()))

    def _signal_syncs(self):
        """ Indicate that folder syncs can resume. """
        for signal in self.folder_sync_signals.values():
            signal.set()