Beispiel #1
0
    def highestmodseq_callback(self, crispin_client, new_uids, updated_uids,
                               download_stack, async_download):
        log.debug('running highestmodseq callback')
        uids = new_uids + updated_uids
        g_metadata = crispin_client.g_metadata(uids)
        to_download = self.__deduplicate_message_download(
            crispin_client, g_metadata, uids)

        if self.folder_name in thread_expand_folders(crispin_client):
            flags = crispin_client.flags(to_download)
            for uid in sorted(to_download):
                if uid in flags and uid in g_metadata:
                    # IMAP will just return no data for a UID if it's
                    # disappeared from the folder in the meantime.
                    download_stack.put(
                        uid,
                        GMessage(uid, g_metadata[uid], flags[uid].flags,
                                 flags[uid].labels, False))
            if not async_download:
                # Need to select All Mail before doing thread expansion
                if not self.is_all_mail(crispin_client):
                    crispin_client.select_folder(
                        crispin_client.folder_names()['all'], uidvalidity_cb)
                self.__download_queued_threads(crispin_client, download_stack)
                if not self.is_all_mail(crispin_client):
                    crispin_client.select_folder(self.folder_name,
                                                 uidvalidity_cb)
        elif self.folder_name in uid_download_folders(crispin_client):
            for uid in sorted(to_download):
                download_stack.put(uid, None)
            if not async_download:
                self.download_uids(crispin_client, download_stack)
        else:
            raise MailsyncError('Unknown Gmail sync folder: {}'.format(
                self.folder_name))
Beispiel #2
0
    def prepare_sync(self):
        """
        Gets and save Folder objects for folders on the IMAP backend. Returns a
        list of tuples (folder_name, folder_id) for each folder we want to sync
        (in order).
        """
        with session_scope(self.namespace_id) as db_session:
            with connection_pool(self.account_id).get() as crispin_client:
                # Get a fresh list of the folder names from the remote
                remote_folders = crispin_client.folders()
                if self.saved_remote_folders != remote_folders:
                    self.save_folder_names(db_session, remote_folders)
                    self.saved_remote_folders = remote_folders
                # The folders we should be syncing
                sync_folders = crispin_client.sync_folders()

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error('Missing Folder object when starting sync',
                              folder_name=folder_name)
                    raise MailsyncError(
                        u"Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids
Beispiel #3
0
    def prepare_sync(self):
        """Ensures that canonical tags are created for the account, and gets
        and save Folder objects for folders on the IMAP backend. Returns a list
        of tuples (folder_name, folder_id) for each folder we want to sync (in
        order)."""
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                save_folder_names(log, self.account_id,
                                  crispin_client.folder_names(), db_session)

            sync_folder_names_ids = []
            for folder_name in sync_folders:
                try:
                    id_, = db_session.query(Folder.id). \
                        filter(Folder.name == folder_name,
                               Folder.account_id == self.account_id).one()
                    sync_folder_names_ids.append((folder_name, id_))
                except NoResultFound:
                    log.error("Missing Folder object when starting sync",
                              folder_name=folder_name)
                    raise MailsyncError(
                        "Missing Folder '{}' on account {}".format(
                            folder_name, self.account_id))
            return sync_folder_names_ids
Beispiel #4
0
def gmail_highestmodseq_update(crispin_client, log, folder_name, new_uids,
                               updated_uids, syncmanager_lock):
    uids = new_uids + updated_uids
    g_metadata = crispin_client.g_metadata(uids)
    to_download = deduplicate_message_download(
        crispin_client, log, syncmanager_lock, g_metadata, uids)

    if folder_name == crispin_client.folder_names()['inbox']:
        flags = crispin_client.flags(to_download)
        message_download_stack = LifoQueue()
        for uid in to_download:
            if uid in flags and uid in g_metadata:
                # IMAP will just return no data for a UID if it's disappeared
                # from the folder in the meantime.
                message_download_stack.put(GMessage(
                    uid, g_metadata[uid], flags[uid].flags, flags[uid].labels))
        download_queued_threads(crispin_client, log, folder_name,
                                message_download_stack, syncmanager_lock)
    elif folder_name in uid_download_folders(crispin_client):
        uid_download_stack = uid_list_to_stack(to_download)
        download_queued_uids(crispin_client, log, folder_name,
                             uid_download_stack, 0, uid_download_stack.qsize(),
                             syncmanager_lock, gmail_download_and_commit_uids,
                             create_gmail_message)
    else:
        raise MailsyncError(
            'Unknown Gmail sync folder: {}'.format(folder_name))
Beispiel #5
0
    def sync(self):
        """ Start per-folder syncs. Only have one per-folder sync in the
            'initial' state at a time.
        """
        with mailsync_session_scope() as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                account = db_session.query(ImapAccount)\
                    .get(self.account_id)
                save_folder_names(log, account, crispin_client.folder_names(),
                                  db_session)
            Tag.create_canonical_tags(account.namespace, db_session)

            folder_id_for = {
                name: id_
                for id_, name in db_session.query(Folder.id, Folder.name).
                filter_by(account_id=self.account_id)
            }

            saved_states = {
                name: state
                for name, state in db_session.query(
                    Folder.name, ImapFolderSyncStatus.state).join(
                        ImapFolderSyncStatus.folder).filter(
                            ImapFolderSyncStatus.account_id == self.account_id)
            }

        for folder_name in sync_folders:
            if folder_name not in folder_id_for:
                log.error("Missing Folder object when starting sync",
                          folder_name=folder_name,
                          folder_id_for=folder_id_for)
                raise MailsyncError("Missing Folder '{}' on account {}".format(
                    folder_name, self.account_id))

            if saved_states.get(folder_name) != 'finish':
                log.info('initializing folder sync')
                # STOPSHIP(emfree): replace by appropriate base class.
                thread = self.sync_engine_class(
                    self.account_id, folder_name, folder_id_for[folder_name],
                    self.email_address, self.provider_name,
                    self.poll_frequency, self.syncmanager_lock,
                    self.refresh_flags_max, self.retry_fail_classes)
                thread.start()
                self.folder_monitors.add(thread)
                while not self._thread_polling(thread) and \
                        not self._thread_finished(thread) and \
                        not thread.ready():
                    sleep(self.heartbeat)

                # Allow individual folder sync monitors to shut themselves down
                # after completing the initial sync.
                if self._thread_finished(thread) or thread.ready():
                    log.info('folder sync finished/killed',
                             folder_name=thread.folder_name)
                    # NOTE: Greenlet is automatically removed from the group.

        self.folder_monitors.join()
Beispiel #6
0
    def sync(self):
        """ Start per-folder syncs. Only have one per-folder sync in the
            'initial' state at a time.
        """
        with session_scope(ignore_soft_deletes=False) as db_session:
            with _pool(self.account_id).get() as crispin_client:
                sync_folders = crispin_client.sync_folders()
                account = db_session.query(ImapAccount)\
                    .get(self.account_id)
                save_folder_names(self.log, account,
                                  crispin_client.folder_names(), db_session)
            Tag.create_canonical_tags(account.namespace, db_session)

            folder_id_for = {
                name: id_
                for id_, name in db_session.query(Folder.id, Folder.name).
                filter_by(account_id=self.account_id)
            }

            saved_states = {
                name: state
                for name, state in db_session.query(
                    Folder.name, ImapFolderSyncStatus.state).join(
                        ImapFolderSyncStatus.folder).filter(
                            ImapFolderSyncStatus.account_id == self.account_id)
            }

        for folder_name in sync_folders:
            if folder_name not in folder_id_for:
                self.log.error("Missing Folder object when starting sync",
                               folder_name=folder_name,
                               folder_id_for=folder_id_for)
                raise MailsyncError("Missing Folder '{}' on account {}".format(
                    folder_name, self.account_id))

            if saved_states.get(folder_name) != 'finish':
                self.log.info('initializing folder sync')
                thread = ImapFolderSyncMonitor(
                    self.account_id, folder_name, folder_id_for[folder_name],
                    self.email_address, self.provider_name, self.shared_state,
                    self.folder_state_handlers, self.retry_fail_classes)
                thread.start()
                self.folder_monitors.add(thread)
                while not self._thread_polling(thread) and \
                        not self._thread_finished(thread):
                    sleep(self.heartbeat)
                # Allow individual folder sync monitors to shut themselves down
                # after completing the initial sync.
                if self._thread_finished(thread):
                    self.log.info('folder sync finished')
                    # NOTE: Greenlet is automatically removed from the group
                    # after finishing.

        self.folder_monitors.join()
Beispiel #7
0
    def __init__(self, account_id, namespace_id, folder_name, email_address,
                 provider_name, syncmanager_lock, sync_signal):
        with session_scope(namespace_id) as db_session:
            try:
                folder = db_session.query(Folder). \
                    filter(Folder.name == folder_name,
                           Folder.account_id == account_id).one()
            except NoResultFound:
                raise MailsyncError(
                    u"Missing Folder '{}' on account {}".format(
                        folder_name, account_id))

            self.folder_id = folder.id
            self.folder_role = folder.canonical_name
            # Metric flags for sync performance
            self.is_initial_sync = folder.initial_sync_end is None
            self.is_first_sync = folder.initial_sync_start is None
            self.is_first_message = self.is_first_sync

        bind_context(self, 'foldersyncengine', account_id, self.folder_id)
        self.account_id = account_id
        self.namespace_id = namespace_id
        self.folder_name = folder_name
        self.email_address = email_address

        if self.folder_name.lower() == 'inbox':
            self.poll_frequency = INBOX_POLL_FREQUENCY
        else:
            self.poll_frequency = DEFAULT_POLL_FREQUENCY
        self.syncmanager_lock = syncmanager_lock
        self.state = None
        self.provider_name = provider_name
        self.last_fast_refresh = None
        self.flags_fetch_results = {}
        self.conn_pool = connection_pool(self.account_id)

        self.sync_signal = sync_signal

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
        }

        self.setup_heartbeats()
        Greenlet.__init__(self)

        # Some generic IMAP servers are throwing UIDVALIDITY
        # errors forever. Instead of resyncing those servers
        # ad vitam, we keep track of the number of consecutive
        # times we got such an error and bail out if it's higher than
        # MAX_UIDINVALID_RESYNCS.
        self.uidinvalid_count = 0
Beispiel #8
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            with mailsync_session_scope() as db_session:
                local_uids = common.all_uids(self.account_id, db_session,
                                             self.folder_name)
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            remote_uid_count = len(remote_uids)
            with self.syncmanager_lock:
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)

                    local_uids = set(local_uids) - deleted_uids
                    unknown_uids = set(remote_uids) - local_uids
                    self.update_uid_counts(
                        db_session,
                        remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids))

            remote_g_metadata = crispin_client.g_metadata(unknown_uids)
            download_stack = UIDStack()
            change_poller = spawn(self.poll_for_changes, download_stack)
            if self.folder_name in uid_download_folders(crispin_client):
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                for uid in sorted(full_download):
                    download_stack.put(uid, None)
                self.download_uids(crispin_client, download_stack)
            elif self.folder_name in thread_expand_folders(crispin_client):
                flags = crispin_client.flags(unknown_uids)
                for uid in sorted(unknown_uids):
                    if uid in flags:
                        gmessage = GMessage(uid,
                                            remote_g_metadata[uid],
                                            flags[uid].flags,
                                            flags[uid].labels,
                                            throttled=self.throttled)
                        download_stack.put(uid, gmessage)
                # We always download threads via the 'All Mail' folder.
                crispin_client.select_folder(
                    crispin_client.folder_names()['all'], uidvalidity_cb)
                self.__download_queued_threads(crispin_client, download_stack)
            else:
                raise MailsyncError('Unknown Gmail sync folder: {}'.format(
                    self.folder_name))
        finally:
            if change_poller is not None:
                change_poller.kill()
Beispiel #9
0
    def __init__(self, account_id, namespace_id, folder_name, email_address,
                 provider_name, syncmanager_lock):

        with session_scope(namespace_id) as db_session:
            try:
                folder = db_session.query(Folder). \
                    filter(Folder.name == folder_name,
                           Folder.account_id == account_id).one()
            except NoResultFound:
                raise MailsyncError(
                    u"Missing Folder '{}' on account {}".format(
                        folder_name, account_id))

            self.folder_id = folder.id
            self.folder_role = folder.canonical_name
            # Metric flags for sync performance
            self.is_initial_sync = folder.initial_sync_end is None
            self.is_first_sync = folder.initial_sync_start is None
            self.is_first_message = self.is_first_sync

        bind_context(self, 'foldersyncengine', account_id, self.folder_id)
        self.account_id = account_id
        self.namespace_id = namespace_id
        self.folder_name = folder_name
        if self.folder_name.lower() == 'inbox':
            self.poll_frequency = INBOX_POLL_FREQUENCY
        else:
            self.poll_frequency = DEFAULT_POLL_FREQUENCY
        self.syncmanager_lock = syncmanager_lock
        self.state = None
        self.provider_name = provider_name
        self.last_fast_refresh = None
        self.flags_fetch_results = {}
        self.conn_pool = connection_pool(self.account_id)

        self.state_handlers = {
            'initial': self.initial_sync,
            'initial uidinvalid': self.resync_uids,
            'poll': self.poll,
            'poll uidinvalid': self.resync_uids,
        }

        Greenlet.__init__(self)

        self.heartbeat_status = HeartbeatStatusProxy(self.account_id,
                                                     self.folder_id,
                                                     self.folder_name,
                                                     email_address,
                                                     self.provider_name)
Beispiel #10
0
    def highestmodseq_callback(self, crispin_client, new_uids, updated_uids):
        uids = new_uids + updated_uids
        g_metadata = crispin_client.g_metadata(uids)
        to_download = self.__deduplicate_message_download(
            crispin_client, g_metadata, uids)

        if self.folder_name == crispin_client.folder_names()['inbox']:
            flags = crispin_client.flags(to_download)
            message_download_stack = LifoQueue()
            for uid in to_download:
                if uid in flags and uid in g_metadata:
                    # IMAP will just return no data for a UID if it's
                    # disappeared from the folder in the meantime.
                    message_download_stack.put(
                        GMessage(uid, g_metadata[uid], flags[uid].flags,
                                 flags[uid].labels))
            self.__download_queued_threads(crispin_client,
                                           message_download_stack)
        elif self.folder_name in uid_download_folders(crispin_client):
            uid_download_stack = uid_list_to_stack(to_download)
            self.download_uids(crispin_client, uid_download_stack)
        else:
            raise MailsyncError('Unknown Gmail sync folder: {}'.format(
                self.folder_name))
Beispiel #11
0
    def initial_sync_impl(self, crispin_client, local_uids,
                          uid_download_stack):
        # We wrap the block in a try/finally because the greenlets like
        # new_uid_poller need to be killed when this greenlet is interrupted
        try:
            remote_uid_count = len(set(crispin_client.all_uids()))
            remote_g_metadata, update_uid_count = self.__fetch_g_metadata(
                crispin_client, local_uids)
            remote_uids = sorted(remote_g_metadata.keys(), key=int)
            log.info(remote_uid_count=len(remote_uids))
            if self.folder_name == crispin_client.folder_names()['all']:
                log.info(local_uid_count=len(local_uids))

            with self.syncmanager_lock:
                log.debug('gmail_initial_sync grabbed syncmanager_lock')
                with mailsync_session_scope() as db_session:
                    deleted_uids = self.remove_deleted_uids(
                        db_session, local_uids, remote_uids)
                    delete_uid_count = len(deleted_uids)

                    local_uids = set(local_uids) - deleted_uids
                    unknown_uids = set(remote_uids) - local_uids

                    # Persist the num(messages) to sync (any type of sync:
                    # download, update or delete) before we start.  Note that
                    # num_local_deleted, num_local_updated ARE the numbers to
                    # delete/update too since we make those changes rightaway
                    # before we start downloading messages.
                    self.update_uid_counts(
                        db_session,
                        remote_uid_count=remote_uid_count,
                        download_uid_count=len(unknown_uids),
                        update_uid_count=update_uid_count,
                        delete_uid_count=delete_uid_count)

            if self.folder_name == crispin_client.folder_names()['inbox']:
                # We don't do an initial dedupe for Inbox because we do thread
                # expansion, which means even if we have a given msgid
                # downloaded, we miiight not have the whole thread. This means
                # that restarts cause duplicate work, but hopefully these
                # folders aren't too huge.
                message_download_stack = LifoQueue()
                flags = crispin_client.flags(unknown_uids)
                for uid in unknown_uids:
                    if uid in flags:
                        message_download_stack.put(
                            GMessage(uid, remote_g_metadata[uid],
                                     flags[uid].flags, flags[uid].labels))
                new_uid_poller = spawn(self.__check_new_g_thrids,
                                       message_download_stack)
                self.__download_queued_threads(crispin_client,
                                               message_download_stack)
            elif self.folder_name in uid_download_folders(crispin_client):
                full_download = self.__deduplicate_message_download(
                    crispin_client, remote_g_metadata, unknown_uids)
                add_uids_to_stack(full_download, uid_download_stack)
                new_uid_poller = spawn(self.check_new_uids, uid_download_stack)
                self.download_uids(crispin_client, uid_download_stack)
            else:
                raise MailsyncError('Unknown Gmail sync folder: {}'.format(
                    self.folder_name))

            # Complete X-GM-MSGID mapping is no longer needed after initial
            # sync.
            rm_cache(
                remote_g_metadata_cache_file(self.account_id,
                                             self.folder_name))
        finally:
            new_uid_poller.kill()
Beispiel #12
0
def gmail_initial_sync(crispin_client, log, folder_name, shared_state,
                       local_uids, uid_download_stack, msg_create_fn):
    remote_uid_count = len(set(crispin_client.all_uids()))
    remote_g_metadata, update_uid_count = get_g_metadata(
        crispin_client, log, folder_name, local_uids,
        shared_state['syncmanager_lock'])
    remote_uids = sorted(remote_g_metadata.keys(), key=int)
    log.info(remote_uid_count=len(remote_uids))
    if folder_name == crispin_client.folder_names()['all']:
        log.info(local_uid_count=len(local_uids))

    with shared_state['syncmanager_lock']:
        log.debug('gmail_initial_sync grabbed syncmanager_lock')
        with session_scope(ignore_soft_deletes=False) as db_session:
            deleted_uids = remove_deleted_uids(
                crispin_client.account_id, db_session, log, folder_name,
                local_uids, remote_uids)
            delete_uid_count = len(deleted_uids)

            local_uids = set(local_uids) - deleted_uids
            unknown_uids = set(remote_uids) - local_uids

            # Persist the num(messages) to sync (any type of sync: download,
            # update or delete) before we start.
            # Note that num_local_deleted, num_local_updated ARE the numbers to
            # delete/update too since we make those changes rightaway before we
            # start downloading messages.
            update_uid_counts(db_session, log, crispin_client.account_id,
                              folder_name, remote_uid_count=remote_uid_count,
                              download_uid_count=len(unknown_uids),
                              update_uid_count=update_uid_count,
                              delete_uid_count=delete_uid_count)

    if folder_name == crispin_client.folder_names()['inbox']:
        # We don't do an initial dedupe for Inbox because we do thread
        # expansion, which means even if we have a given msgid downloaded, we
        # miiight not have the whole thread. This means that restarts cause
        # duplicate work, but hopefully these folders aren't too huge.
        message_download_stack = LifoQueue()
        flags = crispin_client.flags(unknown_uids)
        for uid in unknown_uids:
            if uid in flags:
                message_download_stack.put(
                    GMessage(uid, remote_g_metadata[uid], flags[uid].flags,
                             flags[uid].labels))
        new_uid_poller = spawn(check_new_g_thrids, crispin_client.account_id,
                               crispin_client.PROVIDER, folder_name, log,
                               message_download_stack,
                               shared_state['poll_frequency'],
                               shared_state['syncmanager_lock'])
        download_queued_threads(crispin_client, log, folder_name,
                                message_download_stack,
                                shared_state['syncmanager_lock'])
    elif folder_name in uid_download_folders(crispin_client):
        full_download = deduplicate_message_download(
            crispin_client, log, shared_state['syncmanager_lock'],
            remote_g_metadata, unknown_uids)
        add_uids_to_stack(full_download, uid_download_stack)
        new_uid_poller = spawn(check_new_uids, crispin_client.account_id,
                               folder_name,
                               log, uid_download_stack,
                               shared_state['poll_frequency'],
                               shared_state['syncmanager_lock'])
        download_queued_uids(crispin_client, log, folder_name,
                             uid_download_stack, len(local_uids),
                             len(unknown_uids),
                             shared_state['syncmanager_lock'],
                             gmail_download_and_commit_uids, msg_create_fn)
    else:
        raise MailsyncError(
            'Unknown Gmail sync folder: {}'.format(folder_name))

    # Complete X-GM-MSGID mapping is no longer needed after initial sync.
    rm_cache(remote_g_metadata_cache_file(crispin_client.account_id,
                                          folder_name))

    new_uid_poller.kill()