def test_parallel_folder_syncs(db, folder_name_mapping, default_account,
                               monkeypatch):
    # test that when we run save_folder_names in parallel, we only create one
    # tag for that folder. this happens when the CondstoreFolderSyncEngine
    # checks for UID changes.

    # patching the heartbeat clear means that we force the first greenlet to
    # wait around (there is a deleted folder in folder_name_mapping), thereby
    # assuring that the second greenlet will overtake it and force any
    # potential race condition around tag creation.
    def clear_heartbeat_patch(w, x, y, z):
        gevent.sleep(1)

    monkeypatch.setattr('inbox.heartbeat.store.HeartbeatStore.remove_folders',
                        clear_heartbeat_patch)

    log = get_logger()
    group = Group()
    with mailsync_session_scope() as db_session:
        group.spawn(save_folder_names, log, default_account.id,
                    folder_name_mapping, db_session)
    with mailsync_session_scope() as db_session:
        group.spawn(save_folder_names, log, default_account.id,
                    folder_name_mapping, db_session)
    group.join()

    with mailsync_session_scope() as db_session:
        account = db_session.query(Account).get(default_account.id)
        random_tags = db_session.query(Tag).filter_by(
            namespace_id=account.namespace.id, name='random')
        assert random_tags.count() == 1
def test_name_collision_folders(db, default_account, folder_name_mapping):
    # test that when a user-created folder called 'spam' is created, we don't
    # associate it with the canonical spam tag, but instead give it its own
    # tag

    folder_name_mapping["extra"] = ["spam"]

    with mailsync_session_scope() as db_session:
        log = get_logger()
        save_folder_names(log, default_account.id, folder_name_mapping, db_session)
        spam_tags = db_session.query(Tag).filter_by(namespace_id=default_account.namespace.id, name="spam")
        # There should be one 'Gmail/Spam' canonical tag
        assert spam_tags.count() == 1
        assert spam_tags.first().public_id == "spam"
        # and one 'imap/spam' non-canonical tag with public_id != 'spam'
        spam_tags = db_session.query(Tag).filter_by(namespace_id=default_account.namespace.id, name="imap/spam")
        assert spam_tags.count() == 1
        assert spam_tags.first().public_id != "spam"

    # test that when a folder called 'spam' is deleted, we don't delete
    # the canonical 'spam' tag
    folder_name_mapping["extra"] = []
    with mailsync_session_scope() as db_session:
        log = get_logger()
        save_folder_names(log, default_account.id, folder_name_mapping, db_session)
        spam_tags = db_session.query(Tag).filter_by(namespace_id=default_account.namespace.id, name="spam")
        # The 'Gmail/Spam' canonical tag should still remain.
        assert spam_tags.count() == 1
        assert spam_tags.first().public_id == "spam"
        # The 'imap/spam' non-canonical tag shouldn't
        spam_tags = db_session.query(Tag).filter_by(namespace_id=default_account.namespace.id, name="imap/spam")
        assert spam_tags.count() == 0
Exemple #3
0
    def check_uid_changes(self, crispin_client, download_stack,
                          async_download):
        crispin_client.select_folder(self.folder_name, uidvalidity_cb)
        new_highestmodseq = crispin_client.selected_highestmodseq
        with mailsync_session_scope() as db_session:
            saved_folder_info = common.get_folder_info(
                self.account_id, db_session, self.folder_name)
            # Ensure that we have an initial highestmodseq value stored before
            # we begin polling for changes.
            if saved_folder_info is None or \
                    saved_folder_info.highestmodseq is None:
                assert (crispin_client.selected_uidvalidity is not None
                        and crispin_client.selected_highestmodseq is
                        not None)
                saved_folder_info = common.update_folder_info(
                    crispin_client.account_id, db_session,
                    self.folder_name,
                    crispin_client.selected_uidvalidity,
                    crispin_client.selected_highestmodseq)
            saved_highestmodseq = saved_folder_info.highestmodseq
            if new_highestmodseq == saved_highestmodseq:
                # Don't need to do anything if the highestmodseq hasn't
                # changed.
                return
            elif new_highestmodseq < saved_highestmodseq:
                # This should really never happen, but if it does, handle it.
                log.warning('got server highestmodseq less than saved '
                            'highestmodseq',
                            new_highestmodseq=new_highestmodseq,
                            saved_highestmodseq=saved_highestmodseq)
                return
            save_folder_names(log, self.account_id,
                              crispin_client.folder_names(), db_session)
        # Highestmodseq has changed, update accordingly.
        new_uidvalidity = crispin_client.selected_uidvalidity
        changed_uids = crispin_client.new_and_updated_uids(saved_highestmodseq)
        remote_uids = crispin_client.all_uids()
        with mailsync_session_scope() as db_session:
            local_uids = common.all_uids(self.account_id, db_session,
                                         self.folder_name)
        stack_uids = {uid for uid, _ in download_stack}
        local_with_pending_uids = local_uids | stack_uids
        new, updated = new_or_updated(changed_uids, local_with_pending_uids)
        if changed_uids:
            log.info("Changed UIDs", message="new: {} updated: {}"
                                             .format(len(new), len(updated)),
                     new_uid_count=len(new), updated_uid_count=len(updated))
            self.update_metadata(crispin_client, updated)
            self.highestmodseq_callback(crispin_client, new, updated,
                                        download_stack, async_download)

        with mailsync_session_scope() as db_session:
            with self.syncmanager_lock:
                self.remove_deleted_uids(db_session, local_uids, remote_uids)
            self.update_uid_counts(db_session,
                                   remote_uid_count=len(remote_uids))
            common.update_folder_info(self.account_id, db_session,
                                      self.folder_name, new_uidvalidity,
                                      new_highestmodseq)
            db_session.commit()
def test_parallel_folder_syncs(db, folder_name_mapping, monkeypatch):
    # test that when we run save_folder_names in parallel, we only create one
    # tag for that folder. this happens when the CondstoreFolderSyncEngine
    # checks for UID changes.

    # patching the heartbeat clear means that we force the first greenlet to
    # wait around (there is a deleted folder in folder_name_mapping), thereby
    # assuring that the second greenlet will overtake it and force any
    # potential race condition around tag creation.
    def clear_heartbeat_patch(w, x, y, z):
        gevent.sleep(1)

    monkeypatch.setattr('inbox.heartbeat.store.HeartbeatStore.remove_folders',
                        clear_heartbeat_patch)

    log = get_logger()
    group = Group()
    with mailsync_session_scope() as db_session:
        group.spawn(save_folder_names, log, ACCOUNT_ID,
                    folder_name_mapping, db_session)
    with mailsync_session_scope() as db_session:
        group.spawn(save_folder_names, log, ACCOUNT_ID,
                    folder_name_mapping, db_session)
    group.join()

    with mailsync_session_scope() as db_session:
        account = db_session.query(Account).get(ACCOUNT_ID)
        random_tags = db_session.query(Tag).filter_by(
            namespace_id=account.namespace.id,
            name='random')
        assert random_tags.count() == 1
Exemple #5
0
    def check_uid_changes(self, crispin_client, download_stack,
                          async_download):
        crispin_client.select_folder(self.folder_name, uidvalidity_cb)
        new_highestmodseq = crispin_client.selected_highestmodseq
        with mailsync_session_scope() as db_session:
            saved_folder_info = common.get_folder_info(self.account_id,
                                                       db_session,
                                                       self.folder_name)
            # Ensure that we have an initial highestmodseq value stored before
            # we begin polling for changes.
            if saved_folder_info is None or \
                    saved_folder_info.highestmodseq is None:
                assert (crispin_client.selected_uidvalidity is not None
                        and crispin_client.selected_highestmodseq is not None)
                saved_folder_info = common.update_folder_info(
                    crispin_client.account_id, db_session, self.folder_name,
                    crispin_client.selected_uidvalidity,
                    crispin_client.selected_highestmodseq)
            saved_highestmodseq = saved_folder_info.highestmodseq
            if new_highestmodseq == saved_highestmodseq:
                # Don't need to do anything if the highestmodseq hasn't
                # changed.
                return
            elif new_highestmodseq < saved_highestmodseq:
                # This should really never happen, but if it does, handle it.
                log.warning(
                    'got server highestmodseq less than saved '
                    'highestmodseq',
                    new_highestmodseq=new_highestmodseq,
                    saved_highestmodseq=saved_highestmodseq)
                return
        # Highestmodseq has changed, update accordingly.
        new_uidvalidity = crispin_client.selected_uidvalidity
        changed_uids = crispin_client.new_and_updated_uids(saved_highestmodseq)
        remote_uids = crispin_client.all_uids()
        with mailsync_session_scope() as db_session:
            local_uids = common.all_uids(self.account_id, db_session,
                                         self.folder_id)
        stack_uids = set(download_stack.keys())
        local_with_pending_uids = local_uids | stack_uids
        new, updated = new_or_updated(changed_uids, local_with_pending_uids)
        if changed_uids:
            log.info("Changed UIDs",
                     message="new: {} updated: {}".format(
                         len(new), len(updated)),
                     new_uid_count=len(new),
                     updated_uid_count=len(updated))
            self.update_metadata(crispin_client, updated)
            self.highestmodseq_callback(crispin_client, new, updated,
                                        download_stack, async_download)

        with mailsync_session_scope() as db_session:
            with self.syncmanager_lock:
                self.remove_deleted_uids(db_session, local_uids, remote_uids)
            self.update_uid_counts(db_session,
                                   remote_uid_count=len(remote_uids))
            common.update_folder_info(self.account_id, db_session,
                                      self.folder_name, new_uidvalidity,
                                      new_highestmodseq)
            db_session.commit()
Exemple #6
0
 def check_uid_changes(self, crispin_client, download_stack,
                       async_download):
     remote_uids = set(crispin_client.all_uids())
     with self.syncmanager_lock:
         with mailsync_session_scope() as db_session:
             local_uids = common.all_uids(self.account_id, db_session,
                                          self.folder_name)
             # Download new UIDs.
             stack_uids = {uid for uid, _ in download_stack}
             local_with_pending_uids = local_uids | stack_uids
             # filter out messages that have disappeared on the remote side
             download_stack.discard([item for item in download_stack if
                                     item[0] not in remote_uids])
             for uid in sorted(remote_uids):
                 if uid not in local_with_pending_uids:
                     download_stack.put(uid, None)
             self.remove_deleted_uids(db_session, local_uids, remote_uids)
     if not async_download:
         self.download_uids(crispin_client, download_stack)
         with mailsync_session_scope() as db_session:
             self.update_uid_counts(
                 db_session,
                 remote_uid_count=len(remote_uids),
                 download_uid_count=download_stack.qsize())
     to_refresh = sorted(remote_uids &
                         local_uids)[-self.refresh_flags_max:]
     self.update_metadata(crispin_client, to_refresh)
Exemple #7
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    local_uids = common.all_uids(self.account_id, db_session,
                                                 self.folder_name)
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

            local_uids = set(local_uids) - deleted_uids
            new_uids = set(remote_uids) - local_uids
            download_stack = UIDStack(new_uids)

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes, download_stack)
            self.download_uids(crispin_client, download_stack)

        finally:
            if change_poller is not None:
                change_poller.kill()
Exemple #8
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    local_uids = common.all_uids(self.account_id, db_session,
                                                 self.folder_id)
                    self.remove_deleted_uids(db_session, local_uids,
                                             remote_uids)

            new_uids = set(remote_uids) - local_uids
            download_stack = UIDStack()
            for uid in sorted(new_uids):
                download_stack.put(uid, GenericUIDMetadata(self.throttled))

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes, download_stack)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            self.download_uids(crispin_client, download_stack)

        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Exemple #9
0
 def check_uid_changes(self, crispin_client, download_stack,
                       async_download):
     remote_uids = set(crispin_client.all_uids())
     with self.syncmanager_lock:
         with mailsync_session_scope() as db_session:
             local_uids = common.all_uids(self.account_id, db_session,
                                          self.folder_name)
             # Download new UIDs.
             stack_uids = {uid for uid, _ in download_stack}
             local_with_pending_uids = local_uids | stack_uids
             # filter out messages that have disappeared on the remote side
             download_stack.discard([
                 item for item in download_stack
                 if item[0] not in remote_uids
             ])
             for uid in sorted(remote_uids):
                 if uid not in local_with_pending_uids:
                     download_stack.put(uid, None)
             self.remove_deleted_uids(db_session, local_uids, remote_uids)
     if not async_download:
         self.download_uids(crispin_client, download_stack)
         with mailsync_session_scope() as db_session:
             self.update_uid_counts(
                 db_session,
                 remote_uid_count=len(remote_uids),
                 download_uid_count=download_stack.qsize())
     to_refresh = sorted(remote_uids & local_uids)[-self.refresh_flags_max:]
     self.update_metadata(crispin_client, to_refresh)
Exemple #10
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    local_uids = common.all_uids(self.account_id, db_session,
                                                 self.folder_id)
                    self.remove_deleted_uids(db_session, local_uids,
                                             remote_uids)

            new_uids = set(remote_uids) - local_uids
            download_stack = UIDStack()
            for uid in sorted(new_uids):
                download_stack.put(uid, GenericUIDMetadata(self.throttled))

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes, download_stack)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            self.download_uids(crispin_client, download_stack)

        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Exemple #11
0
    def __fetch_g_metadata(self, crispin_client, uids):
        assert self.folder_name == crispin_client.selected_folder_name, \
            "crispin selected folder isn't as expected"
        remote_g_metadata = None

        with mailsync_session_scope() as db_session:
            saved_folder_info = common.get_folder_info(
                self.account_id, db_session, self.folder_name)
            saved_highestmodseq = or_none(saved_folder_info, lambda i:
                                          i.highestmodseq)
        if saved_highestmodseq is not None:
            # If there's no cached validity we probably haven't run before.
            remote_g_metadata = self.__retrieve_saved_g_metadata(
                crispin_client, uids, saved_highestmodseq)

        if remote_g_metadata is None:
            remote_g_metadata = crispin_client.g_metadata(
                crispin_client.all_uids())
            set_cache(remote_g_metadata_cache_file(self.account_id,
                                                   self.folder_name),
                      remote_g_metadata)
            # Save highestmodseq that corresponds to the saved g_metadata.
        with mailsync_session_scope() as db_session:
            common.update_folder_info(self.account_id, db_session,
                                      self.folder_name,
                                      crispin_client.selected_uidvalidity,
                                      crispin_client.selected_highestmodseq)
            db_session.commit()

        return remote_g_metadata
Exemple #12
0
    def __fetch_g_metadata(self, crispin_client, uids):
        assert self.folder_name == crispin_client.selected_folder_name, \
            "crispin selected folder isn't as expected"
        remote_g_metadata = None
        update_uid_count = 0

        with mailsync_session_scope() as db_session:
            saved_folder_info = common.get_folder_info(self.account_id,
                                                       db_session,
                                                       self.folder_name)
            saved_highestmodseq = or_none(saved_folder_info,
                                          lambda i: i.highestmodseq)
        if saved_highestmodseq is not None:
            # If there's no cached validity we probably haven't run before.
            remote_g_metadata, update_uid_count = \
                self.__retrieve_saved_g_metadata(crispin_client, uids,
                                                 saved_highestmodseq)

        if remote_g_metadata is None:
            remote_g_metadata = crispin_client.g_metadata(
                crispin_client.all_uids())
            set_cache(
                remote_g_metadata_cache_file(self.account_id,
                                             self.folder_name),
                remote_g_metadata)
            # Save highestmodseq that corresponds to the saved g_metadata.
        with mailsync_session_scope() as db_session:
            common.update_folder_info(self.account_id, db_session,
                                      self.folder_name,
                                      crispin_client.selected_uidvalidity,
                                      crispin_client.selected_highestmodseq)
            db_session.commit()

        return remote_g_metadata, update_uid_count
Exemple #13
0
 def check_uid_changes(self, crispin_client, download_stack,
                       async_download):
     remote_uids = set(crispin_client.all_uids())
     with self.syncmanager_lock:
         with mailsync_session_scope() as db_session:
             local_uids = common.all_uids(self.account_id, db_session,
                                          self.folder_id)
             # Download new UIDs.
             stack_uids = set(download_stack.keys())
             local_with_pending_uids = local_uids | stack_uids
             for uid in sorted(remote_uids):
                 if uid not in local_with_pending_uids:
                     download_stack.put(uid, None)
             self.remove_deleted_uids(db_session, local_uids, remote_uids)
     if not async_download:
         self.download_uids(crispin_client, download_stack)
         with mailsync_session_scope() as db_session:
             self.update_uid_counts(
                 db_session,
                 remote_uid_count=len(remote_uids),
                 download_uid_count=len(download_stack))
     to_refresh = sorted(remote_uids &
                         local_uids)[-self.refresh_flags_max:]
     self.update_metadata(crispin_client, to_refresh)
     with mailsync_session_scope() as db_session:
         common.update_folder_info(self.account_id, db_session,
                                   self.folder_name,
                                   crispin_client.selected_uidvalidity,
                                   None,
                                   crispin_client.selected_uidnext)
Exemple #14
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
            remote_uid_count = len(set(crispin_client.all_uids()))
            remote_g_metadata = self.__fetch_g_metadata(
                crispin_client, local_uids)
            remote_uids = sorted(remote_g_metadata.keys(), key=int)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

                    local_uids = set(local_uids) - deleted_uids
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session, remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            download_stack = UIDStack()
            if self.folder_name == crispin_client.folder_names()['inbox']:
                # We don't do an initial dedupe for Inbox because we do thread
                # expansion, which means even if we have a given msgid
                # downloaded, we miiight not have the whole thread. This means
                # that restarts cause duplicate work, but hopefully these
                # folders aren't too huge.
                flags = crispin_client.flags(unknown_uids)
                for uid in sorted(unknown_uids):
                    if uid in flags:
                        download_stack.put(
                            uid,
                            GMessage(uid, remote_g_metadata[uid],
                                     flags[uid].flags, flags[uid].labels,
                                     throttled=self.throttled))
                change_poller = spawn(self.poll_for_changes, download_stack)
                self.__download_queued_threads(crispin_client,
                                               download_stack)
            elif self.folder_name in uid_download_folders(crispin_client):
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                change_poller = spawn(self.poll_for_changes, download_stack)
                self.download_uids(crispin_client, download_stack)
            else:
                raise MailsyncError(
                    'Unknown Gmail sync folder: {}'.format(self.folder_name))

            # Complete X-GM-MSGID mapping is no longer needed after initial
            # sync.
            rm_cache(remote_g_metadata_cache_file(self.account_id,
                                                  self.folder_name))
        finally:
            if change_poller is not None:
                change_poller.kill()
Exemple #15
0
    def _run_impl(self):
        # We defer initializing the pool to here so that we'll retry if there
        # are any errors (remote server 503s or similar) when initializing it.
        self.conn_pool = connection_pool(self.account_id)
        try:
            saved_folder_status = self._load_state()
        except IntegrityError:
            # The state insert failed because the folder ID ForeignKey
            # was no longer valid, ie. the folder for this engine was deleted
            # while we were starting up.
            # Exit the sync and let the monitor sort things out.
            log.info("Folder state loading failed due to IntegrityError",
                     folder_id=self.folder_id,
                     account_id=self.account_id)
            raise MailsyncDone()

        # NOTE: The parent ImapSyncMonitor handler could kill us at any
        # time if it receives a shutdown command. The shutdown command is
        # equivalent to ctrl-c.
        while True:
            old_state = self.state
            try:
                self.state = self.state_handlers[old_state]()
                self.heartbeat_status.publish(state=self.state)
            except UidInvalid:
                self.state = self.state + ' uidinvalid'
                self.heartbeat_status.publish(state=self.state)
            except FolderMissingError:
                # Folder was deleted by monitor while its sync was running.
                # TODO: Monitor should handle shutting down the folder engine.
                log.info('Folder disappeared. Stopping sync.',
                         account_id=self.account_id,
                         folder_name=self.folder_name,
                         folder_id=self.folder_id)
                raise MailsyncDone()
            except ValidationError as exc:
                log.error('Error authenticating; stopping sync',
                          exc_info=True,
                          account_id=self.account_id,
                          folder_id=self.folder_id,
                          logstash_tag='mark_invalid')
                with mailsync_session_scope() as db_session:
                    account = db_session.query(Account).get(self.account_id)
                    account.mark_invalid()
                    account.update_sync_error(str(exc))
                raise MailsyncDone()

            # State handlers are idempotent, so it's okay if we're
            # killed between the end of the handler and the commit.
            if self.state != old_state:
                # Don't need to re-query, will auto refresh on re-associate.
                with mailsync_session_scope() as db_session:
                    db_session.add(saved_folder_status)
                    saved_folder_status.state = self.state
                    db_session.commit()
            if self.state == 'finish':
                return
Exemple #16
0
    def _run_impl(self):
        # We defer initializing the pool to here so that we'll retry if there
        # are any errors (remote server 503s or similar) when initializing it.
        self.conn_pool = connection_pool(self.account_id)
        try:
            saved_folder_status = self._load_state()
        except IntegrityError:
            # The state insert failed because the folder ID ForeignKey
            # was no longer valid, ie. the folder for this engine was deleted
            # while we were starting up.
            # Exit the sync and let the monitor sort things out.
            log.info("Folder state loading failed due to IntegrityError",
                     folder_id=self.folder_id, account_id=self.account_id)
            raise MailsyncDone()

        # NOTE: The parent ImapSyncMonitor handler could kill us at any
        # time if it receives a shutdown command. The shutdown command is
        # equivalent to ctrl-c.
        while True:
            old_state = self.state
            try:
                self.state = self.state_handlers[old_state]()
                self.heartbeat_status.publish(state=self.state)
            except UidInvalid:
                self.state = self.state + ' uidinvalid'
                self.heartbeat_status.publish(state=self.state)
            except FolderMissingError:
                # Folder was deleted by monitor while its sync was running.
                # TODO: Monitor should handle shutting down the folder engine.
                log.info('Folder disappeared. Stopping sync.',
                          account_id=self.account_id,
                          folder_name=self.folder_name,
                          folder_id=self.folder_id)
                raise MailsyncDone()
            except ValidationError as exc:
                log.error('Error authenticating; stopping sync', exc_info=True,
                          account_id=self.account_id, folder_id=self.folder_id,
                          logstash_tag='mark_invalid')
                with mailsync_session_scope() as db_session:
                    account = db_session.query(Account).get(self.account_id)
                    account.mark_invalid()
                    account.update_sync_error(str(exc))
                raise MailsyncDone()

            # State handlers are idempotent, so it's okay if we're
            # killed between the end of the handler and the commit.
            if self.state != old_state:
                # Don't need to re-query, will auto refresh on re-associate.
                with mailsync_session_scope() as db_session:
                    db_session.add(saved_folder_status)
                    saved_folder_status.state = self.state
                    db_session.commit()
            if self.state == 'finish':
                return
Exemple #17
0
    def highestmodseq_update(self, crispin_client, last_highestmodseq):
        new_highestmodseq = crispin_client.selected_highestmodseq
        new_uidvalidity = crispin_client.selected_uidvalidity
        log.info('starting highestmodseq update',
                 current_highestmodseq=new_highestmodseq)
        changed_uids = crispin_client.new_and_updated_uids(last_highestmodseq)
        remote_uids = crispin_client.all_uids()

        local_uids = None
        if changed_uids:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)

            new, updated = new_or_updated(changed_uids, local_uids)
            log.info(new_uid_count=len(new), updated_uid_count=len(updated))

            local_uids.update(new)
            with self.syncmanager_lock:
                log.debug("highestmodseq_update acquired syncmanager_lock")
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

            local_uids = local_uids - deleted_uids
            self.update_metadata(crispin_client, updated)

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    download_uid_count=len(new),
                    update_uid_count=len(updated),
                    delete_uid_count=len(deleted_uids))

            self.highestmodseq_callback(crispin_client, new, updated)
        else:
            log.info("No new or updated messages")

        with mailsync_session_scope() as db_session:
            with self.syncmanager_lock:
                log.debug("highestmodseq_update acquired syncmanager_lock")
                if local_uids is None:
                    local_uids = common.all_uids(
                        self.account_id, db_session, self.folder_name)
                deleted_uids = self.remove_deleted_uids(
                    db_session, local_uids, remote_uids)
            self.update_uid_counts(db_session,
                                   remote_uid_count=len(remote_uids),
                                   delete_uid_count=len(deleted_uids))
            common.update_folder_info(self.account_id, db_session,
                                      self.folder_name, new_uidvalidity,
                                      new_highestmodseq)
            db_session.commit()
Exemple #18
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_id)
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            remote_uid_count = len(remote_uids)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    self.remove_deleted_uids(db_session, local_uids,
                                             remote_uids)
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session,
                        remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            remote_g_metadata = crispin_client.g_metadata(unknown_uids)
            download_stack = UIDStack()
            change_poller = spawn(self.poll_for_changes, download_stack)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            if self.is_all_mail(crispin_client):
                # Put UIDs on the stack such that UIDs for messages in the
                # inbox get downloaded first, and such that higher (i.e., more
                # recent) UIDs get downloaded before lower ones.
                inbox_uids = crispin_client.search_uids(['X-GM-LABELS inbox'])
                inbox_uid_set = set(inbox_uids)
                # Note that we have to be checking membership in a /set/ for
                # performance.
                ordered_uids_to_sync = [
                    u for u in sorted(remote_uids) if u not in inbox_uid_set
                ] + sorted(inbox_uids)
                for uid in ordered_uids_to_sync:
                    if uid in remote_g_metadata:
                        metadata = GMetadata(remote_g_metadata[uid].msgid,
                                             remote_g_metadata[uid].thrid,
                                             self.throttled)
                        download_stack.put(uid, metadata)
                self.__download_queued_threads(crispin_client, download_stack)
            else:
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                self.download_uids(crispin_client, download_stack)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Exemple #19
0
    def poll_impl(self, crispin_client):
        log.bind(state='poll')

        with mailsync_session_scope() as db_session:
            saved_folder_info = common.get_folder_info(
                crispin_client.account_id, db_session, self.folder_name)

            saved_highestmodseq = saved_folder_info.highestmodseq

        # Start a session since we're going to IDLE below anyway...
        # This also resets the folder name cache, which we want in order to
        # detect folder/label additions and deletions.
        status = crispin_client.select_folder(
            self.folder_name, uidvalidity_cb(crispin_client.account_id))

        log.debug(current_modseq=status['HIGHESTMODSEQ'],
                  saved_modseq=saved_highestmodseq)

        if status['HIGHESTMODSEQ'] > saved_highestmodseq:
            with mailsync_session_scope() as db_session:
                acc = db_session.query(ImapAccount).get(self.account_id)
                save_folder_names(log, acc, crispin_client.folder_names(),
                                  db_session)
            self.highestmodseq_update(crispin_client, saved_highestmodseq)

        # We really only want to idle on a folder for new messages. Idling on
        # `All Mail` won't tell us when messages are archived from the Inbox
        if self.folder_name.lower() in IDLE_FOLDERS:
            status = crispin_client.select_folder(
                self.folder_name, uidvalidity_cb(crispin_client.account_id))
            # Idle doesn't pick up flag changes, so we don't want to idle for
            # very long, or we won't detect things like messages being marked
            # as read.
            idle_frequency = 30

            log.info('idling', timeout=idle_frequency)
            crispin_client.conn.idle()
            crispin_client.conn.idle_check(timeout=idle_frequency)

            # If we want to do something with the response, but lousy
            # because it uses sequence IDs instead of UIDs
            # resp = c.idle_check(timeout=shared_state['poll_frequency'])
            # r = dict( EXISTS=[], EXPUNGE=[])
            # for msg_uid, cmd in resp:
            #     r[cmd].append(msg_uid)
            # print r

            crispin_client.conn.idle_done()
            log.info('IDLE triggered poll')
        else:
            log.info('IDLE sleeping', seconds=self.poll_frequency)
            sleep(self.poll_frequency)
Exemple #20
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            remote_uid_count = len(remote_uids)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

                    local_uids = set(local_uids) - deleted_uids
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session,
                        remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            remote_g_metadata = crispin_client.g_metadata(unknown_uids)
            download_stack = UIDStack()
            change_poller = spawn(self.poll_for_changes, download_stack)
            if self.folder_name in uid_download_folders(crispin_client):
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                self.download_uids(crispin_client, download_stack)
            elif self.folder_name in thread_expand_folders(crispin_client):
                flags = crispin_client.flags(unknown_uids)
                for uid in sorted(unknown_uids):
                    if uid in flags:
                        gmessage = GMessage(uid,
                                            remote_g_metadata[uid],
                                            flags[uid].flags,
                                            flags[uid].labels,
                                            throttled=self.throttled)
                        download_stack.put(uid, gmessage)
                # We always download threads via the 'All Mail' folder.
                crispin_client.select_folder(
                    crispin_client.folder_names()['all'], uidvalidity_cb)
                self.__download_queued_threads(crispin_client, download_stack)
            else:
                raise MailsyncError('Unknown Gmail sync folder: {}'.format(
                    self.folder_name))
        finally:
            if change_poller is not None:
                change_poller.kill()
Exemple #21
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_id)
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            remote_uid_count = len(remote_uids)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    self.remove_deleted_uids(db_session, local_uids,
                                             remote_uids)
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session, remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            remote_g_metadata = crispin_client.g_metadata(unknown_uids)
            download_stack = UIDStack()
            change_poller = spawn(self.poll_for_changes, download_stack)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            if self.is_all_mail(crispin_client):
                # Put UIDs on the stack such that UIDs for messages in the
                # inbox get downloaded first, and such that higher (i.e., more
                # recent) UIDs get downloaded before lower ones.
                inbox_uids = crispin_client.search_uids(['X-GM-LABELS inbox'])
                inbox_uid_set = set(inbox_uids)
                # Note that we have to be checking membership in a /set/ for
                # performance.
                ordered_uids_to_sync = [u for u in sorted(remote_uids) if u not
                                        in inbox_uid_set] + sorted(inbox_uids)
                for uid in ordered_uids_to_sync:
                    if uid in remote_g_metadata:
                        metadata = GMetadata(remote_g_metadata[uid].msgid,
                                             remote_g_metadata[uid].thrid,
                                             self.throttled)
                        download_stack.put(uid, metadata)
                self.__download_queued_threads(crispin_client, download_stack)
            else:
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                self.download_uids(crispin_client, download_stack)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Exemple #22
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            remote_uid_count = len(remote_uids)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

                    local_uids = set(local_uids) - deleted_uids
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session, remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            remote_g_metadata = crispin_client.g_metadata(unknown_uids)
            download_stack = UIDStack()
            change_poller = spawn(self.poll_for_changes, download_stack)
            if self.folder_name in uid_download_folders(crispin_client):
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                self.download_uids(crispin_client, download_stack)
            elif self.folder_name in thread_expand_folders(crispin_client):
                flags = crispin_client.flags(unknown_uids)
                for uid in sorted(unknown_uids):
                    if uid in flags:
                        gmessage = GMessage(uid, remote_g_metadata[uid],
                                            flags[uid].flags,
                                            flags[uid].labels,
                                            throttled=self.throttled)
                        download_stack.put(uid, gmessage)
                # We always download threads via the 'All Mail' folder.
                crispin_client.select_folder(
                    crispin_client.folder_names()['all'], uidvalidity_cb)
                self.__download_queued_threads(crispin_client, download_stack)
            else:
                raise MailsyncError(
                    'Unknown Gmail sync folder: {}'.format(self.folder_name))
        finally:
            if change_poller is not None:
                change_poller.kill()
Exemple #23
0
    def poll_impl(self, crispin_client):
        crispin_client.select_folder(self.folder_name,
                                     uidvalidity_cb(self.account_id))

        remote_uids = set(crispin_client.all_uids())
        with mailsync_session_scope() as db_session:
            local_uids = common.all_uids(self.account_id, db_session,
                                         self.folder_name)
            deleted_uids = self.remove_deleted_uids(db_session, local_uids,
                                                    remote_uids)

            local_uids -= deleted_uids
            log.info("Removed {} deleted UIDs from {}".format(
                len(deleted_uids), self.folder_name))
            uids_to_download = remote_uids - local_uids

            self.update_uid_counts(db_session,
                                   remote_uid_count=len(remote_uids),
                                   download_uid_count=len(uids_to_download),
                                   delete_uid_count=len(deleted_uids))

        log.info("UIDs to download: {}".format(uids_to_download))
        if uids_to_download:
            self.download_uids(crispin_client,
                               uid_list_to_stack(uids_to_download))

        uids_to_refresh = sorted(remote_uids -
                                 uids_to_download)[-self.refresh_flags_max:]
        log.info('UIDs to refresh: ', uids=uids_to_refresh)
        if uids_to_refresh:
            self.update_metadata(crispin_client, uids_to_refresh)

        sleep(self.poll_frequency)
Exemple #24
0
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids
Exemple #25
0
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids
Exemple #26
0
    def __imap_flag_change_poller(self):
        """
        Periodically update message flags for those servers
        who don't support CONDSTORE.
        Runs until killed. (Intended to be run in a greenlet)
        """
        log.info("Spinning up new flags-refresher for ",
                 folder_name=self.folder_name)
        with self.conn_pool.get() as crispin_client:
            with mailsync_session_scope() as db_session:
                crispin_client.select_folder(
                    self.folder_name,
                    uidvalidity_cb(crispin_client.account_id))
            while True:
                remote_uids = set(crispin_client.all_uids())
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
                # STOPSHIP(emfree): sorted does nothing here
                to_refresh = sorted(remote_uids
                                    & local_uids)[-self.refresh_flags_max:]

                self.update_metadata(crispin_client, to_refresh)
                with session_scope(ignore_soft_deletes=True) as db_session:
                    self.update_uid_counts(db_session,
                                           update_uid_count=len(to_refresh))

                sleep(self.poll_frequency)
Exemple #27
0
def add_new_imapuids(crispin_client, remote_g_metadata, syncmanager_lock,
                     uids):
    """
    Add ImapUid entries only for (already-downloaded) messages.

    If a message has already been downloaded via another folder, we only need
    to add `ImapUid` accounting for the current folder. `Message` objects
    etc. have already been created.

    """
    flags = crispin_client.flags(uids)

    with syncmanager_lock:
        with mailsync_session_scope() as db_session:
            # Since we prioritize download for messages in certain threads, we
            # may already have ImapUid entries despite calling this method.
            local_folder_uids = {uid for uid, in
                                 db_session.query(ImapUid.msg_uid).join(Folder)
                                 .filter(
                                     ImapUid.account_id ==
                                     crispin_client.account_id,
                                     Folder.name ==
                                     crispin_client.selected_folder_name,
                                     ImapUid.msg_uid.in_(uids))}
            uids = [uid for uid in uids if uid not in local_folder_uids]

            if uids:
                acc = db_session.query(GmailAccount).get(
                    crispin_client.account_id)

                # collate message objects to relate the new imapuids to
                imapuid_for = dict([(metadata.msgid, uid) for (uid, metadata)
                                    in remote_g_metadata.items()
                                    if uid in uids])
                imapuid_g_msgids = [remote_g_metadata[uid].msgid for uid in
                                    uids]
                message_for = dict([(imapuid_for[m.g_msgid], m) for m in
                                    db_session.query(Message).join(ImapThread)
                                    .filter(
                                        Message.g_msgid.in_(imapuid_g_msgids),
                                        ImapThread.namespace_id ==
                                        acc.namespace.id)])

                # Stop Folder.find_or_create()'s query from triggering a flush.
                with db_session.no_autoflush:
                    new_imapuids = [ImapUid(
                        account=acc,
                        folder=Folder.find_or_create(
                            db_session, acc,
                            crispin_client.selected_folder_name),
                        msg_uid=uid, message=message_for[uid]) for uid in uids
                        if uid in message_for]
                    for item in new_imapuids:
                        # skip uids which have disappeared in the meantime
                        if item.msg_uid in flags:
                            item.update_flags_and_labels(
                                flags[item.msg_uid].flags,
                                flags[item.msg_uid].labels)
                db_session.add_all(new_imapuids)
                db_session.commit()
def test_sync_folder_deletes(db, default_account, folder_name_mapping):
    """Test that folder deletions properly cascade to deletions of
       ImapFolderSyncStatus and ImapFolderInfo.
    """
    with mailsync_session_scope() as db_session:
        log = get_logger()
        save_folder_names(log, default_account.id, folder_name_mapping,
                          db_session)
        folders = db_session.query(Folder).filter_by(
            account_id=default_account.id)
        for folder in folders:
            add_imap_status_info_rows(folder.id, default_account.id,
                                      db_session)
        db_session.commit()
        assert db_session.query(ImapFolderInfo).filter_by(
            account_id=default_account.id).count() == 7
        assert db_session.query(ImapFolderSyncStatus).filter_by(
            account_id=default_account.id).count() == 7

        folder_name_mapping['extra'] = ['Jobslist']
        save_folder_names(log, default_account.id, folder_name_mapping,
                          db_session)
        saved_folder_names = {
            name
            for name, in db_session.query(Folder.name).filter(
                Folder.account_id == default_account.id)
        }
        assert saved_folder_names == {
            'Inbox', '[Gmail]/Spam', '[Gmail]/All Mail', '[Gmail]/Sent Mail',
            '[Gmail]/Drafts', 'Jobslist'
        }
        assert db_session.query(ImapFolderInfo).filter_by(
            account_id=default_account.id).count() == 6
        assert db_session.query(ImapFolderSyncStatus).filter_by(
            account_id=default_account.id).count() == 6
Exemple #29
0
 def _run_impl(self):
     # We do NOT ignore soft deletes in the mail sync because it gets real
     # complicated handling e.g. when backends reuse imapids. ImapUid
     # objects are the only objects deleted by the mail sync backends
     # anyway.
     saved_folder_status = self._load_state()
     # NOTE: The parent ImapSyncMonitor handler could kill us at any
     # time if it receives a shutdown command. The shutdown command is
     # equivalent to ctrl-c.
     while True:
         old_state = self.state
         try:
             self.state = self.state_handlers[old_state]()
         except UidInvalid:
             self.state = self.state + ' uidinvalid'
         # State handlers are idempotent, so it's okay if we're
         # killed between the end of the handler and the commit.
         if self.state != old_state:
             # Don't need to re-query, will auto refresh on re-associate.
             with mailsync_session_scope() as db_session:
                 db_session.add(saved_folder_status)
                 saved_folder_status.state = self.state
                 db_session.commit()
         if self.state == 'finish':
             return
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # the folders we should be syncing
                sync_folders = crispin_client.sync_folders()
                # get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folder_names(force_resync=True)
                save_folder_names(log, self.account_id,
                                  remote_folders, db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids
Exemple #31
0
    def poll_impl(self, crispin_client):
        crispin_client.select_folder(self.folder_name,
                                     uidvalidity_cb(self.account_id))

        remote_uids = set(crispin_client.all_uids())
        with mailsync_session_scope() as db_session:
            local_uids = common.all_uids(
                self.account_id, db_session, self.folder_name)
            deleted_uids = self.remove_deleted_uids(
                db_session, local_uids, remote_uids)

            local_uids -= deleted_uids
            log.info("Removed {} deleted UIDs from {}".format(
                len(deleted_uids), self.folder_name))
            uids_to_download = remote_uids - local_uids

            self.update_uid_counts(db_session,
                                   remote_uid_count=len(remote_uids),
                                   download_uid_count=len(uids_to_download),
                                   delete_uid_count=len(deleted_uids))

        log.info("UIDs to download: {}".format(uids_to_download))
        if uids_to_download:
            self.download_uids(crispin_client,
                               uid_list_to_stack(uids_to_download))

        uids_to_refresh = sorted(remote_uids -
                                 uids_to_download)[-self.refresh_flags_max:]
        log.info('UIDs to refresh: ', uids=uids_to_refresh)
        if uids_to_refresh:
            self.update_metadata(crispin_client, uids_to_refresh)

        sleep(self.poll_frequency)
Exemple #32
0
    def __imap_flag_change_poller(self):
        """
        Periodically update message flags for those servers
        who don't support CONDSTORE.
        Runs until killed. (Intended to be run in a greenlet)
        """
        log.info("Spinning up new flags-refresher for ",
                 folder_name=self.folder_name)
        with self.conn_pool.get() as crispin_client:
            with mailsync_session_scope() as db_session:
                crispin_client.select_folder(self.folder_name,
                                             uidvalidity_cb(
                                                 crispin_client.account_id))
            while True:
                remote_uids = set(crispin_client.all_uids())
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
                # STOPSHIP(emfree): sorted does nothing here
                to_refresh = sorted(remote_uids &
                                    local_uids)[-self.refresh_flags_max:]

                self.update_metadata(crispin_client, to_refresh)
                with session_scope(ignore_soft_deletes=True) as db_session:
                    self.update_uid_counts(db_session,
                                           update_uid_count=len(to_refresh))

                sleep(self.poll_frequency)
Exemple #33
0
    def __deduplicate_message_download(self, crispin_client, remote_g_metadata,
                                       uids):
        """
        Deduplicate message download using X-GM-MSGID.

        Returns
        -------
        list
            Deduplicated UIDs.

        """
        with mailsync_session_scope() as db_session:
            local_g_msgids = g_msgids(self.account_id, db_session,
                                      in_={remote_g_metadata[uid].msgid
                                           for uid in uids if uid in
                                           remote_g_metadata})

        full_download, imapuid_only = partition(
            lambda uid: uid in remote_g_metadata and
            remote_g_metadata[uid].msgid in local_g_msgids,
            sorted(uids, key=int))
        if imapuid_only:
            log.info('skipping already downloaded uids',
                     count=len(imapuid_only))
            # Since we always download messages via All Mail and create the
            # relevant All Mail ImapUids too at that time, we don't need to
            # create them again here if we're deduping All Mail downloads.
            if crispin_client.selected_folder_name != \
                    crispin_client.folder_names()['all']:
                add_new_imapuids(crispin_client, remote_g_metadata,
                                 self.syncmanager_lock, imapuid_only)

        return full_download
Exemple #34
0
    def __deduplicate_message_download(self, crispin_client, remote_g_metadata,
                                       uids):
        """
        Deduplicate message download using X-GM-MSGID.

        Returns
        -------
        list
            Deduplicated UIDs.

        """
        with mailsync_session_scope() as db_session:
            local_g_msgids = g_msgids(self.namespace_id, db_session,
                                      in_={remote_g_metadata[uid].msgid
                                           for uid in uids if uid in
                                           remote_g_metadata})

        full_download, imapuid_only = partition(
            lambda uid: uid in remote_g_metadata and
            remote_g_metadata[uid].msgid in local_g_msgids,
            sorted(uids, key=int))
        if imapuid_only:
            log.info('downloading new uids for existing messages',
                     count=len(imapuid_only))
            add_new_imapuids(crispin_client, remote_g_metadata,
                             self.syncmanager_lock, imapuid_only)

        return full_download
Exemple #35
0
    def __deduplicate_message_download(self, crispin_client, remote_g_metadata,
                                       uids):
        """
        Deduplicate message download using X-GM-MSGID.

        Returns
        -------
        list
            Deduplicated UIDs.

        """
        with mailsync_session_scope() as db_session:
            local_g_msgids = g_msgids(self.account_id, db_session,
                                      in_={remote_g_metadata[uid].msgid
                                           for uid in uids if uid in
                                           remote_g_metadata})

        full_download, imapuid_only = partition(
            lambda uid: uid in remote_g_metadata and
            remote_g_metadata[uid].msgid in local_g_msgids,
            sorted(uids, key=int))
        if imapuid_only:
            log.info('skipping already downloaded uids',
                     count=len(imapuid_only))
            # Since we always download messages via All Mail and create the
            # relevant All Mail ImapUids too at that time, we don't need to
            # create them again here if we're deduping All Mail downloads.
            if crispin_client.selected_folder_name != \
                    crispin_client.folder_names()['all']:
                add_new_imapuids(crispin_client, remote_g_metadata,
                                 self.syncmanager_lock, imapuid_only)

        return full_download
Exemple #36
0
    def download_and_commit_uids(self, crispin_client, uids):
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return 0
        new_uids = set()
        with self.syncmanager_lock:
            # there is the possibility that another green thread has already
            # downloaded some message(s) from this batch... check within the
            # lock
            with mailsync_session_scope() as db_session:
                raw_messages = self.__deduplicate_message_object_creation(
                    db_session, raw_messages)
                if not raw_messages:
                    return 0

                account = db_session.query(Account).get(self.account_id)
                folder = db_session.query(Folder).get(self.folder_id)
                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder,
                                              msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.flush()
                        new_uids.add(uid)
                db_session.commit()

        self.saved_uids.update(new_uids)
        return len(new_uids)
Exemple #37
0
 def _run_impl(self):
     # We defer initializing the pool to here so that we'll retry if there
     # are any errors (remote server 503s or similar) when initializing it.
     self.conn_pool = _pool(self.account_id)
     # We do NOT ignore soft deletes in the mail sync because it gets real
     # complicated handling e.g. when backends reuse imapids. ImapUid
     # objects are the only objects deleted by the mail sync backends
     # anyway.
     saved_folder_status = self._load_state()
     # eagerly signal the sync status
     self.sync_status.publish(state=self.state)
     # NOTE: The parent ImapSyncMonitor handler could kill us at any
     # time if it receives a shutdown command. The shutdown command is
     # equivalent to ctrl-c.
     while True:
         old_state = self.state
         try:
             self.state = self.state_handlers[old_state]()
             self.sync_status.publish(state=self.state)
         except UidInvalid:
             self.state = self.state + ' uidinvalid'
         # State handlers are idempotent, so it's okay if we're
         # killed between the end of the handler and the commit.
         if self.state != old_state:
             # Don't need to re-query, will auto refresh on re-associate.
             with mailsync_session_scope() as db_session:
                 db_session.add(saved_folder_status)
                 saved_folder_status.state = self.state
                 db_session.commit()
         if self.state == 'finish':
             return
def test_sync_folder_deletes(db, default_account, folder_name_mapping):
    """Test that folder deletions properly cascade to deletions of
       ImapFolderSyncStatus and ImapFolderInfo.
    """
    with mailsync_session_scope() as db_session:
        log = get_logger()
        save_folder_names(log, default_account.id, folder_name_mapping, db_session)
        folders = db_session.query(Folder).filter_by(account_id=default_account.id)
        for folder in folders:
            add_imap_status_info_rows(folder.id, default_account.id, db_session)
        db_session.commit()
        assert db_session.query(ImapFolderInfo).filter_by(account_id=default_account.id).count() == 7
        assert db_session.query(ImapFolderSyncStatus).filter_by(account_id=default_account.id).count() == 7

        folder_name_mapping["extra"] = ["Jobslist"]
        save_folder_names(log, default_account.id, folder_name_mapping, db_session)
        saved_folder_names = {
            name for name, in db_session.query(Folder.name).filter(Folder.account_id == default_account.id)
        }
        assert saved_folder_names == {
            "Inbox",
            "[Gmail]/Spam",
            "[Gmail]/All Mail",
            "[Gmail]/Sent Mail",
            "[Gmail]/Drafts",
            "Jobslist",
        }
        assert db_session.query(ImapFolderInfo).filter_by(account_id=default_account.id).count() == 6
        assert db_session.query(ImapFolderSyncStatus).filter_by(account_id=default_account.id).count() == 6
Exemple #39
0
    def download_and_commit_uids(self, crispin_client, uids):
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return 0
        new_uids = set()
        with self.syncmanager_lock:
            # there is the possibility that another green thread has already
            # downloaded some message(s) from this batch... check within the
            # lock
            with mailsync_session_scope() as db_session:
                raw_messages = self.__deduplicate_message_object_creation(
                    db_session, raw_messages)
                if not raw_messages:
                    return 0

                account = db_session.query(Account).get(self.account_id)
                folder = db_session.query(Folder).get(self.folder_id)
                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder, msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.flush()
                        new_uids.add(uid)
                db_session.commit()

        self.saved_uids.update(new_uids)
        return len(new_uids)
Exemple #40
0
    def __init__(self, account_id, folder_name, folder_id, email_address,
                 provider_name, poll_frequency, syncmanager_lock,
                 refresh_flags_max, retry_fail_classes):
        self.account_id = account_id
        self.folder_name = folder_name
        self.folder_id = folder_id
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = syncmanager_lock
        self.refresh_flags_max = refresh_flags_max
        self.retry_fail_classes = retry_fail_classes
        self.state = None
        self.provider_name = provider_name

        with mailsync_session_scope() as db_session:
            account = db_session.query(Account).get(self.account_id)
            self.throttled = account.throttled
            self.namespace_id = account.namespace.id
            assert self.namespace_id is not None, "namespace_id is None"

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
            'finish': lambda self: 'finish',
        }

        Greenlet.__init__(self)

        self.sync_status = SyncStatus(self.account_id, self.folder_id)
        self.sync_status.publish(provider_name=self.provider_name,
                                 folder_name=self.folder_name)
Exemple #41
0
 def _run_impl(self):
     # We defer initializing the pool to here so that we'll retry if there
     # are any errors (remote server 503s or similar) when initializing it.
     self.conn_pool = _pool(self.account_id)
     # We do NOT ignore soft deletes in the mail sync because it gets real
     # complicated handling e.g. when backends reuse imapids. ImapUid
     # objects are the only objects deleted by the mail sync backends
     # anyway.
     saved_folder_status = self._load_state()
     # NOTE: The parent ImapSyncMonitor handler could kill us at any
     # time if it receives a shutdown command. The shutdown command is
     # equivalent to ctrl-c.
     while True:
         old_state = self.state
         try:
             self.state = self.state_handlers[old_state]()
             self.heartbeat_status.publish(state=self.state)
         except UidInvalid:
             self.state = self.state + ' uidinvalid'
             self.heartbeat_status.publish(state=self.state)
         # State handlers are idempotent, so it's okay if we're
         # killed between the end of the handler and the commit.
         if self.state != old_state:
             # Don't need to re-query, will auto refresh on re-associate.
             with mailsync_session_scope() as db_session:
                 db_session.add(saved_folder_status)
                 saved_folder_status.state = self.state
                 db_session.commit()
         if self.state == 'finish':
             return
Exemple #42
0
    def __deduplicate_message_download(self, crispin_client, remote_g_metadata,
                                       uids):
        """
        Deduplicate message download using X-GM-MSGID.

        Returns
        -------
        list
            Deduplicated UIDs.

        """
        with mailsync_session_scope() as db_session:
            local_g_msgids = g_msgids(self.namespace_id,
                                      db_session,
                                      in_={
                                          remote_g_metadata[uid].msgid
                                          for uid in uids
                                          if uid in remote_g_metadata
                                      })

        full_download, imapuid_only = partition(
            lambda uid: uid in remote_g_metadata and remote_g_metadata[uid].
            msgid in local_g_msgids, sorted(uids, key=int))
        if imapuid_only:
            log.info('downloading new uids for existing messages',
                     count=len(imapuid_only))
            add_new_imapuids(crispin_client, remote_g_metadata,
                             self.syncmanager_lock, imapuid_only)

        return full_download
Exemple #43
0
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with mailsync_session_scope() as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids
Exemple #44
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return 0

        new_uids = set()
        with self.syncmanager_lock:
            # there is the possibility that another green thread has already
            # downloaded some message(s) from this batch... check within the
            # lock
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                folder = db_session.query(Folder).get(self.folder_id)
                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder, msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.flush()
                        new_uids.add(uid)
                db_session.commit()

        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == 'initial' and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))
        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        return len(new_uids)
Exemple #45
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return 0

        new_uids = set()
        with self.syncmanager_lock:
            # there is the possibility that another green thread has already
            # downloaded some message(s) from this batch... check within the
            # lock
            with mailsync_session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)
                folder = db_session.query(Folder).get(self.folder_id)
                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder,
                                              msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.flush()
                        new_uids.add(uid)
                db_session.commit()

        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == 'initial' and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))
        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        return len(new_uids)
Exemple #46
0
    def __init__(self, account_id, folder_name, folder_id, email_address,
                 provider_name, poll_frequency, syncmanager_lock,
                 refresh_flags_max, retry_fail_classes):
        bind_context(self, 'foldersyncengine', account_id, folder_id)
        self.account_id = account_id
        self.folder_name = folder_name
        self.folder_id = folder_id
        self.poll_frequency = poll_frequency
        self.syncmanager_lock = syncmanager_lock
        self.refresh_flags_max = refresh_flags_max
        self.retry_fail_classes = retry_fail_classes
        self.state = None
        self.provider_name = provider_name

        with mailsync_session_scope() as db_session:
            account = db_session.query(Account).get(self.account_id)
            self.throttled = account.throttled
            self.namespace_id = account.namespace.id
            assert self.namespace_id is not None, "namespace_id is None"

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
            'finish': lambda self: 'finish',
        }

        Greenlet.__init__(self)

        self.heartbeat_status = HeartbeatStatusProxy(self.account_id,
                                                     self.folder_id,
                                                     self.folder_name,
                                                     email_address,
                                                     self.provider_name)
Exemple #47
0
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            account = db_session.query(ImapAccount).get(self.account_id)
            Tag.create_canonical_tags(account.namespace, db_session)
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError("Missing Folder '{}' on account {}"
                                        .format(folder_name, self.account_id))
            return sync_folder_names_ids
Exemple #48
0
    def initial_sync_impl(self, crispin_client, local_uids,
                          uid_download_stack,
                          spawn_flags_refresh_poller=True):
        # We wrap the block in a try/finally because the greenlets like
        # new_uid_poller need to be killed when this greenlet is interrupted
        new_uid_poller, flags_refresh_poller = None, None
        try:
            assert crispin_client.selected_folder_name == self.folder_name

            remote_uids = crispin_client.all_uids()
            log.info(remote_uid_count=len(remote_uids))
            log.info(local_uid_count=len(local_uids))

            with self.syncmanager_lock:
                log.debug("imap_initial_sync acquired syncmanager_lock")
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

            local_uids = set(local_uids) - deleted_uids

            new_uids = set(remote_uids) - local_uids
            add_uids_to_stack(new_uids, uid_download_stack)

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids),
                    # Flags are updated in __imap_flag_change_poller() and
                    # update_uid_count is set there
                    delete_uid_count=len(deleted_uids))

            new_uid_poller = spawn(self.check_new_uids, uid_download_stack)

            if spawn_flags_refresh_poller:
                flags_refresh_poller = spawn(self.__imap_flag_change_poller)

            self.download_uids(crispin_client, uid_download_stack)

        finally:
            if new_uid_poller is not None:
                new_uid_poller.kill()

            if spawn_flags_refresh_poller and flags_refresh_poller is not None:
                flags_refresh_poller.kill()
Exemple #49
0
    def initial_sync_impl(self,
                          crispin_client,
                          local_uids,
                          uid_download_stack,
                          spawn_flags_refresh_poller=True):
        # We wrap the block in a try/finally because the greenlets like
        # new_uid_poller need to be killed when this greenlet is interrupted
        try:
            assert crispin_client.selected_folder_name == self.folder_name

            remote_uids = crispin_client.all_uids()
            log.info(remote_uid_count=len(remote_uids))
            log.info(local_uid_count=len(local_uids))

            with self.syncmanager_lock:
                log.debug("imap_initial_sync acquired syncmanager_lock")
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

            local_uids = set(local_uids) - deleted_uids

            new_uids = set(remote_uids) - local_uids
            add_uids_to_stack(new_uids, uid_download_stack)

            with mailsync_session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids),
                    # Flags are updated in __imap_flag_change_poller() and
                    # update_uid_count is set there
                    delete_uid_count=len(deleted_uids))

            new_uid_poller = spawn(self.check_new_uids, uid_download_stack)

            if spawn_flags_refresh_poller:
                flags_refresh_poller = spawn(self.__imap_flag_change_poller)

            self.download_uids(crispin_client, uid_download_stack)

        finally:
            new_uid_poller.kill()

            if spawn_flags_refresh_poller:
                flags_refresh_poller.kill()
Exemple #50
0
    def resync_uids_impl(self):
        # NOTE: first, let's check if the UIVDALIDITY change was spurious, if
        # it is, just discard it and go on, if it isn't, drop the relevant
        # entries (filtering by account and folder IDs) from the imapuid table,
        # download messages, if necessary - in case a message has changed UID -
        # update UIDs, and discard orphaned messages. -siro
        with mailsync_session_scope() as db_session:
            folder_info = db_session.query(ImapFolderInfo). \
                filter_by(account_id=self.account_id,
                          folder_id=self.folder_id).one()
            cached_uidvalidity = folder_info.uidvalidity
            with self.conn_pool.get() as crispin_client:
                crispin_client.select_folder(self.folder_name,
                                             lambda *args: True)
                uidvalidity = crispin_client.selected_uidvalidity
                if uidvalidity <= cached_uidvalidity:
                    log.debug('UIDVALIDITY unchanged')
                    return
                invalid_uids = db_session.query(ImapUid). \
                    filter_by(account_id=self.account_id,
                              folder_id=self.folder_id)
                data_sha256_message = {uid.message.data_sha256: uid.message
                                       for uid in invalid_uids}
                for uid in invalid_uids:
                    db_session.delete(uid)
                # NOTE: this is necessary (and OK since it doesn't persist any
                # data) to maintain the order between UIDs deletion and
                # insertion. Without this, I was seeing constraints violation
                # on the imapuid table. -siro
                db_session.flush()
                remote_uids = crispin_client.all_uids()
                for remote_uid in remote_uids:
                    raw_message = crispin_client.uids([remote_uid])[0]
                    data_sha256 = sha256(raw_message.body).hexdigest()
                    if data_sha256 in data_sha256_message:
                        message = data_sha256_message[data_sha256]

                        # Create a new imapuid
                        uid = ImapUid(msg_uid=raw_message.uid,
                                      message_id=message.id,
                                      account_id=self.account_id,
                                      folder_id=self.folder_id)
                        uid.update_flags(raw_message.flags)
                        db_session.add(uid)

                        # Update the existing message's metadata too
                        common.update_message_metadata(db_session, uid)

                        del data_sha256_message[data_sha256]
                    else:
                        self.download_and_commit_uids(crispin_client,
                                                      [remote_uid])
                    self.heartbeat_status.publish()
                    # FIXME: do we want to throttle the account when recovering
                    # from UIDVALIDITY changes? -siro
            for message in data_sha256_message.itervalues():
                db_session.delete(message)
            folder_info.uidvalidity = uidvalidity
            folder_info.highestmodseq = None
Exemple #51
0
    def sync(self):
        """ Start per-folder syncs. Only have one per-folder sync in the
            'initial' state at a time.
        """
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                account = db_session.query(ImapAccount)\
                    .get(self.account_id)
                save_folder_names(log, account, crispin_client.folder_names(),
                                  db_session)
            Tag.create_canonical_tags(account.namespace, db_session)

            folder_id_for = {
                name: id_
                for id_, name in db_session.query(Folder.id, Folder.name).
                filter_by(account_id=self.account_id)
            }

            saved_states = {
                name: state
                for name, state in db_session.query(
                    Folder.name, ImapFolderSyncStatus.state).join(
                        ImapFolderSyncStatus.folder).filter(
                            ImapFolderSyncStatus.account_id == self.account_id)
            }

        for folder_name in sync_folders:
            if folder_name not in folder_id_for:
                log.error("Missing Folder object when starting sync",
                          folder_name=folder_name,
                          folder_id_for=folder_id_for)
                raise MailsyncError("Missing Folder '{}' on account {}".format(
                    folder_name, self.account_id))

            if saved_states.get(folder_name) != 'finish':
                log.info('initializing folder sync')
                # STOPSHIP(emfree): replace by appropriate base class.
                thread = self.sync_engine_class(
                    self.account_id, folder_name, folder_id_for[folder_name],
                    self.email_address, self.provider_name,
                    self.poll_frequency, self.syncmanager_lock,
                    self.refresh_flags_max, self.retry_fail_classes)
                thread.start()
                self.folder_monitors.add(thread)
                while not self._thread_polling(thread) and \
                        not self._thread_finished(thread) and \
                        not thread.ready():
                    sleep(self.heartbeat)

                # Allow individual folder sync monitors to shut themselves down
                # after completing the initial sync.
                if self._thread_finished(thread) or thread.ready():
                    log.info('folder sync finished/killed',
                             folder_name=thread.folder_name)
                    # NOTE: Greenlet is automatically removed from the group.

        self.folder_monitors.join()
Exemple #52
0
    def check_new_uids(self, uid_download_stack):
        """ Check for new UIDs and add them to the download stack.

        We do this by comparing local UID lists to remote UID lists,
        maintaining the invariant that (stack uids)+(local uids) == (remote
        uids).

        We also remove local messages that have disappeared from the remote,
        since it's totally probable that users will be archiving mail as the
        initial sync goes on.

        We grab a new IMAP connection from the pool for this to isolate its
        actions from whatever the main greenlet may be doing.

        Runs until killed. (Intended to be run in a greenlet.)
        """
        log.info("starting new UID-check poller")
        with self.conn_pool.get() as crispin_client:
            crispin_client.select_folder(
                self.folder_name, uidvalidity_cb(crispin_client.account_id))
            while True:
                remote_uids = set(crispin_client.all_uids())
                # We lock this section to make sure no messages are being
                # created while we make sure the queue is in a good state.
                with self.syncmanager_lock:
                    with mailsync_session_scope() as db_session:
                        local_uids = common.all_uids(self.account_id,
                                                     db_session,
                                                     self.folder_name)
                        stack_uids = set(uid_download_stack.queue)
                        local_with_pending_uids = local_uids | stack_uids
                        deleted_uids = self.remove_deleted_uids(
                            db_session, local_uids, remote_uids)
                        log.info('remoted deleted uids',
                                 count=len(deleted_uids))

                        # filter out messages that have disappeared on the
                        # remote side
                        new_uid_download_stack = {
                            u
                            for u in uid_download_stack.queue
                            if u in remote_uids
                        }

                        # add in any new uids from the remote
                        for uid in remote_uids:
                            if uid not in local_with_pending_uids:
                                new_uid_download_stack.add(uid)
                        uid_download_stack.queue = sorted(
                            new_uid_download_stack, key=int)

                        self.update_uid_counts(
                            db_session,
                            remote_uid_count=len(remote_uids),
                            download_uid_count=uid_download_stack.qsize(),
                            delete_uid_count=len(deleted_uids))
                sleep(self.poll_frequency)
Exemple #53
0
 def resync_uids_impl(self):
     # NOTE: first, let's check if the UIVDALIDITY change was spurious, if
     # it is, just discard it and go on, if it isn't, drop the relevant
     # entries (filtering by account and folder IDs) from the imapuid table,
     # download messages, if necessary - in case a message has changed UID -
     # update UIDs, and discard orphaned messages. -siro
     with mailsync_session_scope() as db_session:
         folder_info = db_session.query(ImapFolderInfo). \
             filter_by(account_id=self.account_id,
                       folder_id=self.folder_id).one()
         cached_uidvalidity = folder_info.uidvalidity
         with self.conn_pool.get() as crispin_client:
             crispin_client.select_folder(self.folder_name,
                                          lambda *args: True)
             uidvalidity = crispin_client.selected_uidvalidity
             if uidvalidity <= cached_uidvalidity:
                 log.debug('UIDVALIDITY unchanged')
                 return
             invalid_uids = db_session.query(ImapUid). \
                 filter_by(account_id=self.account_id,
                           folder_id=self.folder_id)
             data_sha256_message = {
                 uid.message.data_sha256: uid.message
                 for uid in invalid_uids
             }
             for uid in invalid_uids:
                 db_session.delete(uid)
             # NOTE: this is necessary (and OK since it doesn't persist any
             # data) to maintain the order between UIDs deletion and
             # insertion. Without this, I was seeing constraints violation
             # on the imapuid table. -siro
             db_session.flush()
             remote_uids = crispin_client.all_uids()
             for remote_uid in remote_uids:
                 raw_message = crispin_client.uids([remote_uid])[0]
                 data_sha256 = sha256(raw_message.body).hexdigest()
                 if data_sha256 in data_sha256_message:
                     message = data_sha256_message[data_sha256]
                     uid = ImapUid(msg_uid=raw_message.uid,
                                   message_id=message.id,
                                   account_id=self.account_id,
                                   folder_id=self.folder_id)
                     uid.update_flags_and_labels(raw_message.flags,
                                                 raw_message.g_labels)
                     db_session.add(uid)
                     del data_sha256_message[data_sha256]
                 else:
                     self.download_and_commit_uids(crispin_client,
                                                   self.folder_name,
                                                   [remote_uid])
                 self.heartbeat_status.publish()
                 # FIXME: do we want to throttle the account when recovering
                 # from UIDVALIDITY changes? -siro
         for message in data_sha256_message.itervalues():
             db_session.delete(message)
         folder_info.uidvalidity = uidvalidity
         folder_info.highestmodseq = None
Exemple #54
0
 def check_uid_changes(self, crispin_client, download_stack,
                       async_download):
     remote_uids = set(crispin_client.all_uids())
     with self.syncmanager_lock:
         with mailsync_session_scope() as db_session:
             local_uids = common.all_uids(self.account_id, db_session,
                                          self.folder_id)
             # Download new UIDs.
             stack_uids = set(download_stack.keys())
             local_with_pending_uids = local_uids | stack_uids
             for uid in sorted(remote_uids):
                 if uid not in local_with_pending_uids:
                     download_stack.put(uid, None)
             self.remove_deleted_uids(db_session, local_uids, remote_uids)
     if not async_download:
         self.download_uids(crispin_client, download_stack)
         with mailsync_session_scope() as db_session:
             self.update_uid_counts(db_session,
                                    remote_uid_count=len(remote_uids),
                                    download_uid_count=len(download_stack))
     to_refresh = sorted(remote_uids & local_uids)[-self.refresh_flags_max:]
     self.update_metadata(crispin_client, to_refresh)
Exemple #55
0
 def download_and_commit_uids(self, crispin_client, folder_name, uids):
     # Note that folder_name here might *NOT* be equal to self.folder_name,
     # because, for example, we download messages via the 'All Mail' folder
     # in Gmail.
     raw_messages = safe_download(crispin_client, uids)
     with self.syncmanager_lock:
         with mailsync_session_scope() as db_session:
             new_imapuids = create_db_objects(self.account_id, db_session,
                                              log, folder_name,
                                              raw_messages,
                                              self.create_message)
             commit_uids(db_session, log, new_imapuids)
     return len(new_imapuids)
Exemple #56
0
    def initial_sync(self):
        with self.conn_pool.get() as crispin_client:
            uid_download_stack = LifoQueue()
            crispin_client.select_folder(
                self.folder_name, uidvalidity_cb(crispin_client.account_id))

            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(crispin_client.account_id,
                                             db_session, self.folder_name)

            self.initial_sync_impl(crispin_client, local_uids,
                                   uid_download_stack)
        return 'poll'
def test_save_folder_names(db, folder_name_mapping):
    with mailsync_session_scope() as db_session:
        log = get_logger()
        save_folder_names(log, ACCOUNT_ID, folder_name_mapping, db_session)
        saved_folder_names = {
            name
            for name, in db_session.query(Folder.name).filter(
                Folder.account_id == ACCOUNT_ID)
        }
        assert saved_folder_names == {
            'Inbox', '[Gmail]/Spam', '[Gmail]/All Mail', '[Gmail]/Sent Mail',
            '[Gmail]/Drafts', 'Jobslist', 'Random'
        }