Example #1
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(account_id=self.account_id,
                                           session=db_session,
                                           folder_id=self.folder_id,
                                           limit=max_uids)

        flags = crispin_client.flags(local_uids)
        if (max_uids in self.flags_fetch_results and
                self.flags_fetch_results[max_uids] == (local_uids, flags)):
            # If the flags fetch response is exactly the same as the last one
            # we got, then we don't need to persist any changes.
            log.debug('Unchanged flags refresh response, '
                      'not persisting changes', max_uids=max_uids)
            return
        log.debug('Changed flags refresh response, persisting changes',
                  max_uids=max_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        common.remove_deleted_uids(self.account_id, self.folder_id,
                                   expunged_uids)
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   self.folder_role, flags, db_session)
        self.flags_fetch_results[max_uids] = (local_uids, flags)
Example #2
0
def get_g_metadata(crispin_client, log, folder_name, uids, syncmanager_lock):
    assert folder_name == crispin_client.selected_folder_name, \
        "crispin selected folder isn't as expected"
    account_id = crispin_client.account_id
    remote_g_metadata = None
    update_uid_count = 0

    with session_scope(ignore_soft_deletes=False) as db_session:
        saved_folder_info = account.get_folder_info(
            account_id, db_session, folder_name)
        saved_highestmodseq = or_none(saved_folder_info, lambda i:
                                      i.highestmodseq)
    if saved_highestmodseq is not None:
        # If there's no cached validity we probably haven't run before.
        remote_g_metadata, update_uid_count = retrieve_saved_g_metadata(
            crispin_client, log, folder_name, uids,
            saved_highestmodseq, syncmanager_lock)

    if remote_g_metadata is None:
        remote_g_metadata = crispin_client.g_metadata(
            crispin_client.all_uids())
        set_cache(remote_g_metadata_cache_file(account_id, folder_name),
                  remote_g_metadata)
        # Save highestmodseq that corresponds to the saved g_metadata.
    with session_scope(ignore_soft_deletes=False) as db_session:
        account.update_folder_info(account_id, db_session, folder_name,
                                   crispin_client.selected_uidvalidity,
                                   crispin_client.selected_highestmodseq)
        db_session.commit()

    return remote_g_metadata, update_uid_count
Example #3
0
    def _run_impl(self):
        self.log.info('Starting LabelRenameHandler',
                      label_name=self.label_name)

        with connection_pool(self.account_id).get() as crispin_client:
            folder_names = []
            with session_scope(self.account_id) as db_session:
                folders = db_session.query(Folder).filter(
                    Folder.account_id == self.account_id)

                folder_names = [folder.name for folder in folders]
                db_session.expunge_all()

            for folder_name in folder_names:
                crispin_client.select_folder(folder_name, uidvalidity_cb)

                found_uids = crispin_client.search_uids(
                    ['X-GM-LABELS', utf7_encode(self.label_name)])
                flags = crispin_client.flags(found_uids)

                self.log.info('Running metadata update for folder',
                              folder_name=folder_name)
                with session_scope(self.account_id) as db_session:
                    common.update_metadata(self.account_id, folder.id, flags,
                                           db_session)
                    db_session.commit()
Example #4
0
def remote_update_folder(crispin_client, account_id, category_id, old_name,
                         new_name):

    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        account_provider = account.provider

    if account_provider not in ['gmail', 'eas']:
        new_display_name = imap_folder_path(
            new_name, separator=crispin_client.folder_separator,
            prefix=crispin_client.folder_prefix)
    else:
        new_display_name = new_name

    crispin_client.conn.rename_folder(old_name, new_display_name)

    # TODO @karim: Make the main sync loop detect folder renames
    # more accurately, and get rid of this.
    if new_display_name != old_name:
        with session_scope(account_id) as db_session:
            category = db_session.query(Category).get(category_id)
            category.display_name = new_display_name

            for folder in category.folders:
                if folder.name == old_name:
                    folder.name = new_display_name
Example #5
0
    def _run(self):
        with self.semaphore:
            log = logger.new(
                record_id=self.record_id, action_log_id=self.action_log_id,
                action=self.action_name, account_id=self.account_id,
                extra_args=self.extra_args)

            for _ in range(ACTION_MAX_NR_OF_RETRIES):
                try:
                    before_func = datetime.utcnow()
                    if self.extra_args:
                        self.func(self.account_id, self.record_id,
                                  self.extra_args)
                    else:
                        self.func(self.account_id, self.record_id)
                    after_func = datetime.utcnow()

                    with session_scope(self.account_id) as db_session:
                        action_log_entry = db_session.query(ActionLog).get(
                            self.action_log_id)
                        action_log_entry.status = 'successful'
                        db_session.commit()
                        latency = round((datetime.utcnow() -
                                         action_log_entry.created_at).
                                        total_seconds(), 2)
                        func_latency = round((after_func - before_func).
                                             total_seconds(), 2)
                        log.info('syncback action completed',
                                 action_id=self.action_log_id,
                                 latency=latency,
                                 process=self.parent_service.process_number,
                                 func_latency=func_latency)
                        self._log_to_statsd(action_log_entry.status, latency)
                        return
                except Exception:
                    log_uncaught_errors(log, account_id=self.account_id,
                                        provider=self.provider)
                    with session_scope(self.account_id) as db_session:
                        action_log_entry = db_session.query(ActionLog).get(
                            self.action_log_id)
                        action_log_entry.retries += 1
                        if (action_log_entry.retries ==
                                ACTION_MAX_NR_OF_RETRIES):
                            log.critical('Max retries reached, giving up.',
                                         exc_info=True)
                            action_log_entry.status = 'failed'
                            self._log_to_statsd(action_log_entry.status)
                            db_session.commit()
                            return
                        db_session.commit()

                # Wait before retrying
                log.info("Syncback worker retrying action after sleeping",
                         duration=self.retry_interval)

                # TODO(T6974): We might want to do some kind of exponential
                # backoff with jitter to avoid the thundering herd problem if a
                # provider suddenly starts having issues for a short period of
                # time.
                gevent.sleep(self.retry_interval)
Example #6
0
def test_password_auth(email, password):
    with session_scope() as db_session:
        create_account(db_session, email, password)

    start_time = time.time()

    # Check that the account exists
    while time.time() - start_time < TEST_MAX_DURATION_SECS:
        client = APIClient.from_email(email)[0]
        if client is not None:
            break
        time.sleep(TEST_GRANULARITY_CHECK_SECS)

    if client is None:
        assert False, "Account namespace should have been created"

    # Now, compute how much time it takes to start syncing the account
    start_time = time.time()
    got_messages = False
    while time.time() - start_time < TEST_MAX_DURATION_SECS:
        messages = client.get_messages()
        if len(messages) != 0:
            got_messages = True
            break
        time.sleep(TEST_GRANULARITY_CHECK_SECS)
    assert got_messages, "Messages should have been found"

    print "test_password_auth %s %f" % (email, time.time() - start_time)

    # remove the account
    with session_scope() as db_session:
        # remove_account(db_session, email)
        pass
Example #7
0
    def _run(self):
        """
        Index into Elasticsearch the threads, messages of all namespaces.

        """
        with session_scope() as db_session:
            pointer = db_session.query(SearchIndexCursor).first()
            self.transaction_pointer = pointer.transaction_id if pointer else 0

        self.log.info('Starting search-index service',
                      transaction_pointer=self.transaction_pointer)

        while True:
            with session_scope() as db_session:
                transactions = db_session.query(Transaction). \
                    filter(Transaction.id > self.transaction_pointer,
                           or_(Transaction.object_type == 'message',
                               Transaction.object_type == 'thread')). \
                    order_by(asc(Transaction.id)). \
                    limit(self.chunk_size). \
                    options(joinedload(Transaction.namespace)).all()

                # TODO[k]: We ideally want to index chunk_size at a time.
                # This currently indexes <= chunk_size, and it varies each
                # time.
                if transactions:
                    self.index(transactions, db_session)
                    new_pointer = transactions[-1].id
                    self.update_pointer(new_pointer, db_session)
                else:
                    sleep(self.poll_interval)
                db_session.commit()
Example #8
0
    def _new_connection(self):
        from inbox.auth.base import handler_from_provider

        # Ensure that connections are initialized serially, so as not to use
        # many db sessions on startup.
        with self._new_conn_lock:
            auth_handler = handler_from_provider(self.provider_name)

            for retry_count in range(MAX_TRANSIENT_ERRORS):
                try:
                    conn = auth_handler.connect_account(self.email_address,
                                                        self.credential,
                                                        self.imap_endpoint,
                                                        self.account_id)

                    # If we can connect the account, then we can set the sate
                    # to 'running' if it wasn't already
                    if self.sync_state != 'running':
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            self.sync_state = account.sync_state = 'running'
                    return self.client_cls(self.account_id, self.provider_info,
                                           self.email_address, conn,
                                           readonly=self.readonly)

                except ConnectionError, e:
                    if isinstance(e, TransientConnectionError):
                        return None
                    else:
                        logger.error('Error connecting',
                                     account_id=self.account_id)
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            account.sync_state = 'connerror'
                            account.update_sync_error(str(e))
                        return None
                except ValidationError, e:
                    # If we failed to validate, but the account is oauth2, we
                    # may just need to refresh the access token. Try this one
                    # time.
                    if (self.provider_info['auth'] == 'oauth2' and
                            retry_count == 0):
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            self.credential = token_manager.get_token(
                                account, force_refresh=True)
                    else:
                        logger.error('Error validating',
                                     account_id=self.account_id,
                                     logstash_tag='mark_invalid')
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            account.mark_invalid()
                            account.update_sync_error(str(e))
                        raise
Example #9
0
def syncback_worker(semaphore, action, action_log_id, record_id, account_id,
                    syncback_service, retry_interval=30, extra_args=None):
    func = ACTION_FUNCTION_MAP[action]

    with semaphore:
        log = logger.new(record_id=record_id, action_log_id=action_log_id,
                         action=func, account_id=account_id,
                         extra_args=extra_args)
        # Not ignoring soft-deleted objects here because if you, say,
        # delete a draft, we still need to access the object to delete it
        # on the remote.
        try:
            with session_scope(ignore_soft_deletes=False) as db_session:
                if extra_args:
                    func(account_id, record_id, db_session, extra_args)
                else:
                    func(account_id, record_id, db_session)
                action_log_entry = db_session.query(ActionLog).get(
                    action_log_id)
                action_log_entry.status = 'successful'
                db_session.commit()
                latency = round((datetime.utcnow() -
                                 action_log_entry.created_at).
                                total_seconds(), 2)
                log.info('syncback action completed',
                         action_id=action_log_id,
                         latency=latency)
                syncback_service.remove_from_schedule(action_log_id)
        except Exception as e:
            # To reduce error-reporting noise, don't ship to Sentry
            # if not actionable.
            if isinstance(e, ProviderSpecificException):
                log.warning('Uncaught error', exc_info=True)
            else:
                log_uncaught_errors(log, account_id=account_id)

            with session_scope() as db_session:
                action_log_entry = db_session.query(ActionLog).get(
                    action_log_id)
                action_log_entry.retries += 1

                if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES:
                    log.critical('Max retries reached, giving up.',
                                 action_id=action_log_id,
                                 account_id=account_id, exc_info=True)
                    action_log_entry.status = 'failed'
                db_session.commit()

            # Wait for a bit before retrying
            gevent.sleep(retry_interval)

            # Remove the entry from the scheduled set so that it can be
            # retried or given up on.
            syncback_service.remove_from_schedule(action_log_id)

            # Again, don't raise on exceptions that require
            # provider-specific handling e.g. EAS
            if not isinstance(e, ProviderSpecificException):
                raise
Example #10
0
    def _run_impl(self):
        try:
            saved_folder_status = self._load_state()
        except IntegrityError:
            # The state insert failed because the folder ID ForeignKey
            # was no longer valid, ie. the folder for this engine was deleted
            # while we were starting up.
            # Exit the sync and let the monitor sort things out.
            log.info(
                "Folder state loading failed due to IntegrityError",
                folder_id=self.folder_id,
                account_id=self.account_id,
            )
            raise MailsyncDone()

        # NOTE: The parent ImapSyncMonitor handler could kill us at any
        # time if it receives a shutdown command. The shutdown command is
        # equivalent to ctrl-c.
        while True:
            old_state = self.state
            try:
                self.state = self.state_handlers[old_state]()
                self.heartbeat_status.publish(state=self.state)
            except UidInvalid:
                self.state = self.state + " uidinvalid"
                self.heartbeat_status.publish(state=self.state)
            except FolderMissingError:
                # Folder was deleted by monitor while its sync was running.
                # TODO: Monitor should handle shutting down the folder engine.
                log.info(
                    "Folder disappeared. Stopping sync.",
                    account_id=self.account_id,
                    folder_name=self.folder_name,
                    folder_id=self.folder_id,
                )
                raise MailsyncDone()
            except ValidationError as exc:
                log.error(
                    "Error authenticating; stopping sync",
                    exc_info=True,
                    account_id=self.account_id,
                    folder_id=self.folder_id,
                    logstash_tag="mark_invalid",
                )
                with session_scope(self.namespace_id) as db_session:
                    account = db_session.query(Account).get(self.account_id)
                    account.mark_invalid()
                    account.update_sync_error(str(exc))
                raise MailsyncDone()

            # State handlers are idempotent, so it's okay if we're
            # killed between the end of the handler and the commit.
            if self.state != old_state:
                # Don't need to re-query, will auto refresh on re-associate.
                with session_scope(self.namespace_id) as db_session:
                    db_session.add(saved_folder_status)
                    saved_folder_status.state = self.state
                    db_session.commit()
Example #11
0
def imap_initial_sync(crispin_client, log, folder_name, shared_state,
                      local_uids, uid_download_stack, msg_create_fn,
                      spawn_flags_refresh_poller=True):
    # We wrap the block in a try/finally because the greenlets like
    # new_uid_poller need to be killed when this greenlet is interrupted
    try:
        assert crispin_client.selected_folder_name == folder_name

        remote_uids = crispin_client.all_uids()
        log.info(remote_uid_count=len(remote_uids))
        log.info(local_uid_count=len(local_uids))

        with shared_state['syncmanager_lock']:
            log.debug("imap_initial_sync acquired syncmanager_lock")
            with session_scope(ignore_soft_deletes=False) as db_session:
                deleted_uids = remove_deleted_uids(
                    crispin_client.account_id, db_session, log, folder_name,
                    local_uids, remote_uids)

        local_uids = set(local_uids) - deleted_uids

        new_uids = set(remote_uids) - local_uids
        add_uids_to_stack(new_uids, uid_download_stack)

        with session_scope(ignore_soft_deletes=False) as db_session:
            update_uid_counts(db_session, log, crispin_client.account_id,
                              folder_name, remote_uid_count=len(remote_uids),
                              # This is the initial size of our download_queue
                              download_uid_count=len(new_uids),
                              # Flags are updated in imap_check_flags() and
                              # update_uid_count is set there
                              delete_uid_count=len(deleted_uids))

        new_uid_poller = spawn(check_new_uids, crispin_client.account_id,
                               folder_name, log,
                               uid_download_stack,
                               shared_state['poll_frequency'],
                               shared_state['syncmanager_lock'])

        if spawn_flags_refresh_poller:
            flags_refresh_poller = spawn(imap_check_flags,
                                         crispin_client.account_id,
                                         folder_name, log,
                                         shared_state['poll_frequency'],
                                         shared_state['syncmanager_lock'],
                                         shared_state['refresh_flags_max'])

        download_queued_uids(crispin_client, log, folder_name,
                             uid_download_stack,
                             len(local_uids), len(remote_uids),
                             shared_state['syncmanager_lock'],
                             download_and_commit_uids, msg_create_fn)

    finally:
        new_uid_poller.kill()

        if spawn_flags_refresh_poller:
            flags_refresh_poller.kill()
Example #12
0
    def sync(self):
        """Query a remote provider for updates and persist them to the
        database. This function runs every `self.poll_frequency`.

        """
        self.log.info('syncing contacts')
        # Grab timestamp so next sync gets deltas from now
        sync_timestamp = datetime.utcnow()

        with session_scope() as db_session:
            account = db_session.query(Account).get(self.account_id)
            last_sync_dt = account.last_synced_contacts

            all_contacts = self.provider.get_items(sync_from_dt=last_sync_dt)

            # Do a batch insertion of every 100 contact objects
            change_counter = Counter()
            for new_contact in all_contacts:
                new_contact.namespace = account.namespace
                assert new_contact.uid is not None, \
                    'Got remote item with null uid'
                assert isinstance(new_contact.uid, basestring)

                try:
                    existing_contact = db_session.query(Contact).filter(
                        Contact.namespace == account.namespace,
                        Contact.provider_name == self.provider.PROVIDER_NAME,
                        Contact.uid == new_contact.uid).one()

                    # If the remote item was deleted, purge the corresponding
                    # database entries.
                    if new_contact.deleted:
                        db_session.delete(existing_contact)
                        change_counter['deleted'] += 1
                    else:
                        # Update fields in our old item with the new.
                        # Don't save the newly returned item to the database.
                        existing_contact.merge_from(new_contact)
                        change_counter['updated'] += 1

                except NoResultFound:
                    # We didn't know about this before! Add this item.
                    db_session.add(new_contact)
                    change_counter['added'] += 1

                # Flush every 100 objects for perf
                if sum(change_counter.values()) % 100:
                    db_session.flush()

        # Update last sync
        with session_scope() as db_session:
            account = db_session.query(Account).get(self.account_id)
            account.last_synced_contacts = sync_timestamp

        self.log.info('synced contacts', added=change_counter['added'],
                      updated=change_counter['updated'],
                      deleted=change_counter['deleted'])
Example #13
0
def highestmodseq_update(crispin_client, log, folder_name, last_highestmodseq,
                         highestmodseq_fn, syncmanager_lock):
    account_id = crispin_client.account_id
    new_highestmodseq = crispin_client.selected_highestmodseq
    new_uidvalidity = crispin_client.selected_uidvalidity
    log.info('starting highestmodseq update',
             current_highestmodseq=new_highestmodseq)
    changed_uids = crispin_client.new_and_updated_uids(last_highestmodseq)
    remote_uids = crispin_client.all_uids()

    local_uids = None
    if changed_uids:
        with session_scope(ignore_soft_deletes=False) as db_session:
            local_uids = account.all_uids(account_id, db_session, folder_name)

        new, updated = new_or_updated(changed_uids, local_uids)
        log.info(new_uid_count=len(new), updated_uid_count=len(updated))

        local_uids += new
        with syncmanager_lock:
            log.debug("highestmodseq_update acquired syncmanager_lock")
            with session_scope(ignore_soft_deletes=False) as db_session:
                deleted_uids = remove_deleted_uids(account_id, db_session, log,
                                                   folder_name, local_uids,
                                                   remote_uids)

        local_uids = set(local_uids) - deleted_uids
        update_metadata(crispin_client, log, folder_name, updated,
                        syncmanager_lock)

        with session_scope(ignore_soft_deletes=False) as db_session:
            update_uid_counts(db_session, log, account_id, folder_name,
                              remote_uid_count=len(remote_uids),
                              download_uid_count=len(new),
                              update_uid_count=len(updated),
                              delete_uid_count=len(deleted_uids))

        highestmodseq_fn(crispin_client, log, folder_name, new,
                         updated, syncmanager_lock)
    else:
        log.info("No new or updated messages")

    with session_scope(ignore_soft_deletes=False) as db_session:
        with syncmanager_lock:
            log.debug("highestmodseq_update acquired syncmanager_lock")
            if local_uids is None:
                local_uids = account.all_uids(
                    account_id, db_session, folder_name)
            deleted_uids = remove_deleted_uids(crispin_client.account_id,
                                               db_session, log, folder_name,
                                               local_uids, remote_uids)
        update_uid_counts(db_session, log, account_id, folder_name,
                          remote_uid_count=len(remote_uids),
                          delete_uid_count=len(deleted_uids))
        account.update_folder_info(account_id, db_session, folder_name,
                                   new_uidvalidity, new_highestmodseq)
        db_session.commit()
Example #14
0
    def execute_with_lock(self):
        self.log = logger.new(
            record_ids=self.record_ids, action_log_ids=self.action_log_ids,
            action=self.action_name, account_id=self.account_id,
            extra_args=self.extra_args)

        # Double-check that the action is still pending.
        # Although the task queue is populated based on pending actions, it's
        # possible that the processing of one action involved marking other
        # actions as failed.
        records_to_process, action_ids_to_process = self._get_records_and_actions_to_process()
        if len(action_ids_to_process) == 0:
            return

        for attempt in range(ACTION_MAX_NR_OF_RETRIES):
            self.log.debug("executing action", attempt=attempt)
            try:
                before, after = self._execute_timed_action(records_to_process)

                with session_scope(self.account_id) as db_session:
                    action_log_entries = db_session.query(ActionLog). \
                        filter(ActionLog.id.in_(action_ids_to_process))

                    for action_log_entry in action_log_entries:
                        self._mark_action_as_successful(action_log_entry, before, after, db_session)
                    return
            except:
                log_uncaught_errors(self.log, account_id=self.account_id,
                                    provider=self.provider)
                with session_scope(self.account_id) as db_session:
                    action_log_entries = db_session.query(ActionLog). \
                        filter(ActionLog.id.in_(action_ids_to_process))

                    marked_as_failed = False
                    for action_log_entry in action_log_entries:
                        action_log_entry.retries += 1
                        if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES:
                            marked_as_failed = True
                            self._mark_action_as_failed(action_log_entry, db_session)
                        # If we've merged SyncbackTasks then their corresponding
                        # actions should all fail at the same time.
                        assert (not marked_as_failed or
                                action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES)
                        db_session.commit()
                    if marked_as_failed:
                        return

            # Wait before retrying
            self.log.info("Syncback task retrying action after sleeping",
                          duration=self.retry_interval)

            # TODO(T6974): We might want to do some kind of exponential
            # backoff with jitter to avoid the thundering herd problem if a
            # provider suddenly starts having issues for a short period of
            # time.
            gevent.sleep(self.retry_interval)
Example #15
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                common.remove_deleted_uids(
                    self.account_id, self.folder_id,
                    set(local_uids) - set(remote_uids))
                unknown_uids = set(remote_uids) - local_uids
                with session_scope(self.namespace_id) as db_session:
                    self.update_uid_counts(
                        db_session, remote_uid_count=len(remote_uids),
                        download_uid_count=len(unknown_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)

            if self.is_all_mail(crispin_client):
                # Prioritize UIDs for messages in the inbox folder.
                if len(remote_uids) < 1e6:
                    inbox_uids = set(
                        crispin_client.search_uids(['X-GM-LABELS', 'inbox']))
                else:
                    # The search above is really slow (times out) on really
                    # large mailboxes, so bound the search to messages within
                    # the past month in order to get anywhere.
                    since = datetime.utcnow() - timedelta(days=30)
                    inbox_uids = set(crispin_client.search_uids([
                        'X-GM-LABELS', 'inbox',
                        'SINCE', since]))

                uids_to_download = (sorted(unknown_uids - inbox_uids) +
                                    sorted(unknown_uids & inbox_uids))
            else:
                uids_to_download = sorted(unknown_uids)

            for uids in chunk(reversed(uids_to_download), 1024):
                g_metadata = crispin_client.g_metadata(uids)
                # UIDs might have been expunged since sync started, in which
                # case the g_metadata call above will return nothing.
                # They may also have been preemptively downloaded by thread
                # expansion. We can omit such UIDs.
                uids = [u for u in uids if u in g_metadata and u not in
                        self.saved_uids]
                self.batch_download_uids(crispin_client, uids, g_metadata)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Example #16
0
    def _new_connection(self):
        from inbox.auth import handler_from_provider

        # Ensure that connections are initialized serially, so as not to use
        # many db sessions on startup.
        with self._new_conn_lock as _:
            auth_handler = handler_from_provider(self.provider_name)

            for retry_count in range(MAX_TRANSIENT_ERRORS):
                try:
                    conn = auth_handler.connect_account(self.provider_name,
                                                        self.email_address,
                                                        self.credential)

                    # If we can connect the account, then we can set the sate
                    # to 'running' if it wasn't already
                    if self.sync_state != 'running':
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            self.sync_state = account.sync_state = 'running'

                    return new_crispin(self.account_id, self.email_address,
                                       self.provider_name, conn, self.readonly)

                except ConnectionError, e:
                    if isinstance(e, TransientConnectionError):
                        return None
                    else:
                        logger.error('Error connecting',
                                     account_id=self.account_id)
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            account.sync_state = 'connerror'
                        return None
                except ValidationError, e:
                    # If we failed to validate, but the account is oauth2, we
                    # may just need to refresh the access token. Try this one
                    # time.
                    if (self.provider_info['auth'] == 'oauth2' and
                            retry_count == 0):
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            self.credential = account.renew_access_token()
                    else:
                        logger.error('Error validating',
                                     account_id=self.account_id)
                        with session_scope() as db_session:
                            query = db_session.query(ImapAccount)
                            account = query.get(self.account_id)
                            account.sync_state = 'invalid'
                        raise
Example #17
0
    def _run_impl(self):
        old_state = self.state
        try:
            self.state = self.state_handlers[old_state]()
            self.heartbeat_status.publish(state=self.state)
        except UidInvalid:
            self.state = self.state + ' uidinvalid'
            self.uidinvalid_count += 1
            self.heartbeat_status.publish(state=self.state)

            # Check that we're not stuck in an endless uidinvalidity resync loop.
            if self.uidinvalid_count > MAX_UIDINVALID_RESYNCS:
                log.error('Resynced more than MAX_UIDINVALID_RESYNCS in a'
                          ' row. Stopping sync.')

                with session_scope(self.namespace_id) as db_session:
                    account = db_session.query(Account).get(self.account_id)
                    account.disable_sync('Detected endless uidvalidity '
                                         'resync loop')
                    account.sync_state = 'stopped'
                    db_session.commit()

                raise MailsyncDone()

        except FolderMissingError:
            # Folder was deleted by monitor while its sync was running.
            # TODO: Monitor should handle shutting down the folder engine.
            log.info('Folder disappeared. Stopping sync.',
                     account_id=self.account_id,
                     folder_name=self.folder_name,
                     folder_id=self.folder_id)
            raise MailsyncDone()
        except ValidationError as exc:
            log.error('Error authenticating; stopping sync', exc_info=True,
                      account_id=self.account_id, folder_id=self.folder_id,
                      logstash_tag='mark_invalid')
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(str(exc))
            raise MailsyncDone()

        # State handlers are idempotent, so it's okay if we're
        # killed between the end of the handler and the commit.
        if self.state != old_state:
            def update(status):
                status.state = self.state
            self.update_folder_sync_status(update)

        if self.state == old_state and self.state in ['initial', 'poll']:
            # We've been through a normal state transition without raising any
            # error. It's safe to reset the uidvalidity counter.
            self.uidinvalid_count = 0
Example #18
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(
                account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids
            )

        flags = crispin_client.flags(local_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids)
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id, flags, db_session)
Example #19
0
def check_new_uids(account_id, provider, folder_name, log, uid_download_stack,
                   poll_frequency, syncmanager_lock):
    """ Check for new UIDs and add them to the download stack.

    We do this by comparing local UID lists to remote UID lists, maintaining
    the invariant that (stack uids)+(local uids) == (remote uids).

    We also remove local messages that have disappeared from the remote, since
    it's totally probable that users will be archiving mail as the initial
    sync goes on.

    We grab a new IMAP connection from the pool for this to isolate its
    actions from whatever the main greenlet may be doing.

    Runs until killed. (Intended to be run in a greenlet.)
    """
    log.info("Spinning up new UID-check poller for {}".format(folder_name))
    with connection_pool(account_id).get() as crispin_client:
        with session_scope() as db_session:
            crispin_client.select_folder(folder_name,
                                         uidvalidity_cb(
                                             db_session,
                                             crispin_client.account_id))
        while True:
            remote_uids = set(crispin_client.all_uids())
            # We lock this section to make sure no messages are being
            # created while we make sure the queue is in a good state.
            with syncmanager_lock:
                log.debug("check_new_uids acquired syncmanager_lock")
                with session_scope(ignore_soft_deletes=False) as db_session:
                    local_uids = set(account.all_uids(account_id, db_session,
                                                      folder_name))
                    stack_uids = set(uid_download_stack.queue)
                    local_with_pending_uids = local_uids | stack_uids
                    deleted_uids = remove_deleted_uids(
                        account_id, db_session, log, folder_name, local_uids,
                        remote_uids)
                    log.info("Removed {} deleted UIDs from {}".format(
                        len(deleted_uids), folder_name))

                # filter out messages that have disappeared on the remote side
                new_uid_download_stack = {u for u in uid_download_stack.queue
                                          if u in remote_uids}

                # add in any new uids from the remote
                for uid in remote_uids:
                    if uid not in local_with_pending_uids:
                        log.debug("adding new message {} to download queue"
                                  .format(uid))
                        new_uid_download_stack.add(uid)
                uid_download_stack.queue = sorted(new_uid_download_stack,
                                                  key=int)
            sleep(poll_frequency)
Example #20
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                    common.remove_deleted_uids(
                        self.account_id, self.folder_id,
                        set(local_uids).difference(remote_uids),
                        db_session)

            new_uids = set(remote_uids).difference(local_uids)
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                throttled = account.throttled
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            uids = sorted(new_uids, reverse=True)
            count = 0
            for uid in uids:
                # The speedup from batching appears to be less clear for
                # non-Gmail accounts, so for now just download one-at-a-time.
                self.download_and_commit_uids(crispin_client, [uid])
                self.heartbeat_status.publish()
                count += 1
                if throttled and count >= THROTTLE_COUNT:
                    # Throttled accounts' folders sync at a rate of
                    # 1 message/ minute, after the first approx. THROTTLE_COUNT
                    # messages per folder are synced.
                    # Note this is an approx. limit since we use the #(uids),
                    # not the #(messages).
                    sleep(THROTTLE_WAIT)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Example #21
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning('got server highestmodseq less than saved '
                        'highestmodseq',
                        new_highestmodseq=new_highestmodseq,
                        saved_highestmodseq=self.highestmodseq)
            return

        # Highestmodseq has changed, update accordingly.
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   changed_flags, db_session)
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)

        if expunged_uids:
            # If new UIDs have appeared since we last checked in
            # get_new_uids, save them first. We want to always have the
            # latest UIDs before expunging anything, in order to properly
            # capture draft revisions.
            with session_scope(self.namespace_id) as db_session:
                lastseenuid = common.lastseenuid(self.account_id, db_session,
                                                 self.folder_id)
            if remote_uids and lastseenuid < max(remote_uids):
                log.info('Downloading new UIDs before expunging')
                self.get_new_uids(crispin_client)
            with session_scope(self.namespace_id) as db_session:
                common.remove_deleted_uids(self.account_id, self.folder_id,
                                           expunged_uids, db_session)
                db_session.commit()
        self.highestmodseq = new_highestmodseq
Example #22
0
    def download_and_commit_uids(self, crispin_client, uids):
        start = datetime.utcnow()
        raw_messages = crispin_client.uids(uids)
        if not raw_messages:
            return
        new_uids = set()
        with self.syncmanager_lock:
            with session_scope() as db_session:
                account = Account.get(self.account_id, db_session)
                folder = Folder.get(self.folder_id, db_session)
                raw_messages = self.__deduplicate_message_object_creation(
                    db_session, raw_messages, account)
                if not raw_messages:
                    return 0

                for msg in raw_messages:
                    uid = self.create_message(db_session, account, folder,
                                              msg)
                    if uid is not None:
                        db_session.add(uid)
                        db_session.commit()
                        new_uids.add(uid)

        log.info('Committed new UIDs',
                 new_committed_message_count=len(new_uids))
        # If we downloaded uids, record message velocity (#uid / latency)
        if self.state == "initial" and len(new_uids):
            self._report_message_velocity(datetime.utcnow() - start,
                                          len(new_uids))

        if self.is_first_message:
            self._report_first_message()
            self.is_first_message = False

        self.saved_uids.update(new_uids)
Example #23
0
 def resync_uids_impl(self):
     # First, let's check if the UIVDALIDITY change was spurious, if
     # it is, just discard it and go on.
     with self.conn_pool.get() as crispin_client:
         crispin_client.select_folder(self.folder_name, lambda *args: True)
         remote_uidvalidity = crispin_client.selected_uidvalidity
         remote_uidnext = crispin_client.selected_uidnext
         if remote_uidvalidity <= self.uidvalidity:
             log.debug('UIDVALIDITY unchanged')
             return
     # Otherwise, if the UIDVALIDITY really has changed, discard all saved
     # UIDs for the folder, mark associated messages for garbage-collection,
     # and return to the 'initial' state to resync.
     # This will cause message and threads to be deleted and recreated, but
     # uidinvalidity is sufficiently rare that this tradeoff is acceptable.
     with session_scope(self.namespace_id) as db_session:
         invalid_uids = {
             uid for uid, in db_session.query(ImapUid.msg_uid).
             filter_by(account_id=self.account_id,
                       folder_id=self.folder_id)
         }
         common.remove_deleted_uids(self.account_id, self.folder_id,
                                    invalid_uids, db_session)
     self.uidvalidity = remote_uidvalidity
     self.highestmodseq = None
     self.uidnext = remote_uidnext
Example #24
0
    def get_new_uids(self, crispin_client):
        try:
            remote_uidnext = crispin_client.conn.folder_status(
                self.folder_name, ['UIDNEXT']).get('UIDNEXT')
        except ValueError:
            # Work around issue where ValueError is raised on parsing STATUS
            # response.
            log.warning('Error getting UIDNEXT', exc_info=True)
            remote_uidnext = None
        except imaplib.IMAP4.error as e:
            if '[NONEXISTENT]' in e.message:
                raise FolderMissingError()
            else:
                raise e
        if remote_uidnext is not None and remote_uidnext == self.uidnext:
            return
        log.info('UIDNEXT changed, checking for new UIDs',
                 remote_uidnext=remote_uidnext, saved_uidnext=self.uidnext)

        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            lastseenuid = common.lastseenuid(self.account_id, db_session,
                                             self.folder_id)
        latest_uids = crispin_client.conn.fetch('{}:*'.format(lastseenuid + 1),
                                                ['UID']).keys()
        new_uids = set(latest_uids) - {lastseenuid}
        if new_uids:
            for uid in sorted(new_uids):
                self.download_and_commit_uids(crispin_client, [uid])
        self.uidnext = remote_uidnext
Example #25
0
    def initial_sync(self):
        log.bind(state='initial')
        log.info('starting initial sync')

        if self.is_first_sync:
            self._report_initial_sync_start()
            self.is_first_sync = False

        with self.conn_pool.get() as crispin_client:
            crispin_client.select_folder(self.folder_name, uidvalidity_cb)
            # Ensure we have an ImapFolderInfo row created prior to sync start.
            with session_scope(self.namespace_id) as db_session:
                try:
                    db_session.query(ImapFolderInfo). \
                        filter(ImapFolderInfo.account_id == self.account_id,
                               ImapFolderInfo.folder_id == self.folder_id). \
                        one()
                except NoResultFound:
                    imapfolderinfo = ImapFolderInfo(
                        account_id=self.account_id, folder_id=self.folder_id,
                        uidvalidity=crispin_client.selected_uidvalidity,
                        uidnext=crispin_client.selected_uidnext)
                    db_session.add(imapfolderinfo)
                db_session.commit()

            self.initial_sync_impl(crispin_client)

        if self.is_initial_sync:
            self._report_initial_sync_end()
            self.is_initial_sync = False

        return 'poll'
def upgrade():
    if 'easfoldersyncstatus' in Base.metadata.tables:
        from inbox.ignition import main_engine
        engine = main_engine(pool_size=1, max_overflow=0)
        from inbox.models.session import session_scope
        from sqlalchemy.ext.declarative import declarative_base
        from sqlalchemy.orm.exc import NoResultFound
        Base = declarative_base()
        Base.metadata.reflect(engine)
        from inbox.models.backends.eas import EASFolderSyncStatus
        from inbox.models import Folder
        from inbox.util.eas.constants import SKIP_FOLDERS

        with session_scope(versioned=False, ignore_soft_deletes=False) as \
                db_session:
            statuses = db_session.query(EASFolderSyncStatus).filter(
                EASFolderSyncStatus.eas_folder_type.in_(SKIP_FOLDERS)).all()
            for s in statuses:
                db_session.delete(s)
                db_session.delete(s.folder)

            try:
                for status in db_session.query(EASFolderSyncStatus)\
                        .join(Folder).filter(
                            Folder.name == 'RecipientInfo').all():
                    db_session.delete(status)
                    db_session.delete(status.folder)
            except NoResultFound:
                pass

            db_session.commit()
Example #27
0
    def _set_account_info(self):
        with session_scope() as db_session:
            account = db_session.query(Account).get(self.account_id)
            self.provider_name = account.provider
            self.email_address = account.email_address
            self.provider_info = provider_info(account.provider,
                                               account.email_address)
            self.sync_state = account.sync_state

            # Refresh token if need be, for OAuthed accounts
            if self.provider_info['auth'] == 'oauth2':
                try:
                    self.credential = account.access_token
                except ValidationError:
                    logger.error("Error obtaining access token",
                                 account_id=self.account_id)
                    account.sync_state = 'invalid'
                    db_session.commit()
                    raise
                except ConnectionError:
                    logger.error("Error connecting",
                                 account_id=self.account_id)
                    account.sync_state = 'connerror'
                    db_session.commit()
                    raise
            else:
                self.credential = account.password
Example #28
0
def new_crispin(account_id, email_address, provider_name, conn, readonly=True):
    if provider_name == 'gmail':
        cls = GmailCrispinClient
    else:
        info = provider_info(provider_name, email_address)
        # look up in the provider database to see
        # if the provider supports CONDSTORE
        if "condstore" in info:
            if info["condstore"]:
                cls = CondStoreCrispinClient
            else:
                # condstore=False in provider file
                cls = CrispinClient
        else:
            # no match in provider file, check in the
            # account settings.
            with session_scope() as db_session:
                acc = db_session.query(Account).get(account_id)
                if acc is not None:
                    if getattr(acc, 'supports_condstore', False):
                        cls = CondStoreCrispinClient
                    else:
                        cls = CrispinClient
    return cls(account_id, provider_name, email_address, conn,
               readonly=readonly)
Example #29
0
def report_progress(crispin_client, log, folder_name, downloaded_uid_count,
                    num_remaining_messages):
    """ Inform listeners of sync progress. """

    assert crispin_client.selected_folder_name == folder_name

    with session_scope(ignore_soft_deletes=False) as db_session:
        saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\
            .filter(
                ImapFolderSyncStatus.account_id == crispin_client.account_id,
                Folder.name == folder_name).one()

        previous_count = saved_status.metrics.get(
            'num_downloaded_since_timestamp', 0)

        metrics = dict(num_downloaded_since_timestamp=(previous_count +
                                                       downloaded_uid_count),
                       download_uid_count=num_remaining_messages,
                       queue_checked_at=datetime.utcnow())

        saved_status.update_metrics(metrics)

        db_session.commit()

    log.info('mailsync progress', folder=folder_name,
             msg_queue_count=num_remaining_messages)
Example #30
0
def remote_create_folder(crispin_client, account_id, category_id):
    with session_scope(account_id) as db_session:
        category = db_session.query(Category).get(category_id)
        if category is None:
            return
        display_name = category.display_name
    crispin_client.conn.create_folder(display_name)
Example #31
0
    def execute_with_lock(self):
        log = logger.new(record_id=self.record_id,
                         action_log_id=self.action_log_id,
                         action=self.action_name,
                         account_id=self.account_id,
                         extra_args=self.extra_args)

        for _ in range(ACTION_MAX_NR_OF_RETRIES):
            try:
                before_func = datetime.utcnow()
                func_args = [self.account_id, self.record_id]
                if self.extra_args:
                    func_args.append(self.extra_args)
                if self.uses_crispin_client():
                    assert self.crispin_client is not None
                    func_args.insert(0, self.crispin_client)
                self.func(*func_args)
                after_func = datetime.utcnow()

                with session_scope(self.account_id) as db_session:
                    action_log_entry = db_session.query(ActionLog).get(
                        self.action_log_id)
                    action_log_entry.status = 'successful'
                    db_session.commit()
                    latency = round(
                        (datetime.utcnow() -
                         action_log_entry.created_at).total_seconds(), 2)
                    func_latency = round(
                        (after_func - before_func).total_seconds(), 2)
                    log.info('syncback action completed',
                             action_id=self.action_log_id,
                             latency=latency,
                             process=self.parent_service().process_number,
                             func_latency=func_latency)
                    self._log_to_statsd(action_log_entry.status, latency)
                    return
            except Exception:
                log_uncaught_errors(log,
                                    account_id=self.account_id,
                                    provider=self.provider)
                with session_scope(self.account_id) as db_session:
                    action_log_entry = db_session.query(ActionLog).get(
                        self.action_log_id)
                    action_log_entry.retries += 1
                    if (action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES):
                        log.critical('Max retries reached, giving up.',
                                     exc_info=True)
                        action_log_entry.status = 'failed'
                        self._log_to_statsd(action_log_entry.status)
                        db_session.commit()
                        return
                    db_session.commit()

            # Wait before retrying
            log.info("Syncback task retrying action after sleeping",
                     duration=self.retry_interval)

            # TODO(T6974): We might want to do some kind of exponential
            # backoff with jitter to avoid the thundering herd problem if a
            # provider suddenly starts having issues for a short period of
            # time.
            gevent.sleep(self.retry_interval)
def upgrade():
    from sqlalchemy.ext.declarative import declarative_base

    from inbox.ignition import main_engine
    from inbox.models.session import session_scope

    engine = main_engine(pool_size=1, max_overflow=0)
    op.create_table(
        "genericaccount",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(["id"], [u"imapaccount.id"],
                                ondelete="CASCADE"),
        sa.Column("password_id", sa.Integer(), nullable=True),
        sa.Column("provider", sa.String(length=64), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )

    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Account(Base):
        __table__ = Base.metadata.tables["account"]

    class ImapAccount(Base):
        __table__ = Base.metadata.tables["imapaccount"]

    class YahooAccount(Base):
        __table__ = Base.metadata.tables["yahooaccount"]

    class AOLAccount(Base):
        __table__ = Base.metadata.tables["aolaccount"]

    class GenericAccount(Base):
        __table__ = Base.metadata.tables["genericaccount"]

    class Secret(Base):
        __table__ = Base.metadata.tables["secret"]

    with session_scope(versioned=False) as db_session:
        for acct in db_session.query(YahooAccount):
            secret = Secret(
                acl_id=0,
                type=0,
                secret=acct.password,
                created_at=datetime.utcnow(),
                updated_at=datetime.utcnow(),
            )
            db_session.add(secret)
            db_session.commit()

            new_acct = GenericAccount(id=acct.id, provider="yahoo")

            new_acct.password_id = secret.id
            db_session.add(new_acct)

        for acct in db_session.query(AOLAccount):
            secret = Secret(
                acl_id=0,
                type=0,
                secret=acct.password,
                created_at=datetime.utcnow(),
                updated_at=datetime.utcnow(),
            )
            db_session.add(secret)
            db_session.commit()

            new_acct = GenericAccount(id=acct.id, provider="aol")

            new_acct.password_id = secret.id
            db_session.add(new_acct)

        db_session.commit()

    # don't cascade the delete
    engine.execute("drop table aolaccount")
    engine.execute("drop table yahooaccount")
    op.drop_column("imapaccount", "imap_host")
Example #33
0
 def _new_raw_connection(self):
     """Returns a new, authenticated IMAPClient instance for the account."""
     with session_scope() as db_session:
         account = db_session.query(Account).get(self.account_id)
         return self.auth_handler.connect_account(account)
Example #34
0
def upgrade():
    from inbox.models.session import session_scope
    from sqlalchemy.ext.declarative import declarative_base
    from inbox.ignition import main_engine
    engine = main_engine(pool_size=1, max_overflow=0)

    op.alter_column('calendar',
                    'notes',
                    new_column_name='description',
                    existing_type=sa.Text(),
                    existing_nullable=True)
    op.add_column(
        'calendar',
        sa.Column('provider_name', sa.String(length=64), nullable=False))

    op.alter_column('event',
                    'subject',
                    new_column_name='title',
                    existing_type=sa.String(1024),
                    existing_nullable=True)

    op.alter_column('event',
                    'body',
                    new_column_name='description',
                    existing_type=sa.Text(),
                    existing_nullable=True)

    # We're changing the structure of the calendar name so that
    # the provider can be split out from the name as it was previously
    # overloaded. Nobody should have any existing inbox calendars though
    # so we don't have to worry about a user with a calendar name with
    # a dash ('-') in it. These calendars are read_only as they come from
    # a provider.
    #
    # Also, any already synced events are read only as nobody has created
    # events yet.
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Calendar(Base):
        __table__ = Base.metadata.tables['calendar']

    class Event(Base):
        __table__ = Base.metadata.tables['event']

    with session_scope(versioned=False, ignore_soft_deletes=False) \
            as db_session:
        for calendar in db_session.query(Calendar):
            if calendar.name and '-' in calendar.name:
                provider_name, name = calendar.name.split('-')
                calendar.provider_name = provider_name
                calendar.name = name
                calendar.read_only = True
        for event in db_session.query(Event):
            event.read_only = True
        db_session.commit()

    op.drop_constraint('calendar_ibfk_1', 'calendar', type_='foreignkey')
    op.drop_constraint('uuid', 'calendar', type_='unique')

    op.create_unique_constraint('uuid', 'calendar',
                                ['name', 'provider_name', 'account_id'])

    op.create_foreign_key(None,
                          "calendar",
                          "account", ["account_id"], ["id"],
                          ondelete='CASCADE')

    op.drop_constraint('event_ibfk_2', 'event', type_='foreignkey')
    op.create_foreign_key('event_ibfk_2',
                          'event',
                          'calendar', ['calendar_id'], ['id'],
                          ondelete='CASCADE')
Example #35
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning('got server highestmodseq less than saved '
                        'highestmodseq',
                        new_highestmodseq=new_highestmodseq,
                        saved_highestmodseq=self.highestmodseq)
            return

        log.info('HIGHESTMODSEQ has changed, getting changed UIDs',
                 new_highestmodseq=new_highestmodseq,
                 saved_highestmodseq=self.highestmodseq)
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()

        # In order to be able to sync changes to tens of thousands of flags at
        # once, we commit updates in batches. We do this in ascending order by
        # modseq and periodically "checkpoint" our saved highestmodseq. (It's
        # safe to checkpoint *because* we go in ascending order by modseq.)
        # That way if the process gets restarted halfway through this refresh,
        # we don't have to completely start over. It's also slow to load many
        # objects into the SQLAlchemy session and then issue lots of commits;
        # we avoid that by batching.
        flag_batches = chunk(
            sorted(changed_flags.items(), key=lambda (k, v): v.modseq),
            CONDSTORE_FLAGS_REFRESH_BATCH_SIZE)
        for flag_batch in flag_batches:
            with session_scope(self.namespace_id) as db_session:
                common.update_metadata(self.account_id, self.folder_id,
                                       self.folder_role, dict(flag_batch),
                                       db_session)
            if len(flag_batch) == CONDSTORE_FLAGS_REFRESH_BATCH_SIZE:
                interim_highestmodseq = max(v.modseq for k, v in flag_batch)
                self.highestmodseq = interim_highestmodseq

        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)

        if expunged_uids:
            # If new UIDs have appeared since we last checked in
            # get_new_uids, save them first. We want to always have the
            # latest UIDs before expunging anything, in order to properly
            # capture draft revisions.
            with session_scope(self.namespace_id) as db_session:
                lastseenuid = common.lastseenuid(self.account_id, db_session,
                                                 self.folder_id)
            if remote_uids and lastseenuid < max(remote_uids):
                log.info('Downloading new UIDs before expunging')
                self.get_new_uids(crispin_client)
            common.remove_deleted_uids(self.account_id, self.folder_id,
                                       expunged_uids)
        self.highestmodseq = new_highestmodseq
Example #36
0
File: crispin.py Project: wmv/inbox
    def folder_names(self):
        # Different providers have different names for folders, here
        # we have a default map for common name mapping, additional
        # mappings can be provided via the provider configuration file
        default_folder_map = {
            'INBOX': 'inbox',
            'DRAFTS': 'drafts',
            'DRAFT': 'drafts',
            'JUNK': 'spam',
            'ARCHIVE': 'archive',
            'SENT': 'sent',
            'TRASH': 'trash',
            'SPAM': 'spam'
        }

        # Some providers also provide flags to determine common folders
        # Here we read these flags and apply the mapping
        flag_to_folder_map = {
            '\\Trash': 'trash',
            '\\Sent': 'sent',
            '\\Drafts': 'drafts',
            '\\Junk': 'spam',
            '\\Inbox': 'inbox',
            '\\Spam': 'spam'
        }

        # Additionally we provide a custom mapping for providers that
        # don't fit into the defaults.
        info = provider_info(self.provider_name)
        folder_map = info.get('folder_map', {})

        if self._folder_names is None:
            folders = self._fetch_folder_list()
            self._folder_names = dict()
            for flags, delimiter, name in folders:
                if u'\\Noselect' in flags:
                    # special folders that can't contain messages
                    pass
                # TODO: internationalization support
                elif name in folder_map:
                    self._folder_names[folder_map[name]] = name
                elif name.upper() in default_folder_map:
                    self._folder_names[default_folder_map[name.upper()]] = name
                else:
                    matched = False
                    for flag in flags:
                        if flag in flag_to_folder_map:
                            self._folder_names[flag_to_folder_map[flag]] = name
                            matched = True
                    if not matched:
                        self._folder_names.setdefault('extra',
                                                      list()).append(name)

        # TODO: support subfolders

        # Create any needed folders that don't exist on the backend
        needed_folders = set(
            ['inbox', 'drafts', 'sent', 'spam', 'trash', 'archive'])

        needed_folders -= set(self._folder_names.keys())

        for folder_id in needed_folders:
            name = folder_id.capitalize()
            self.create_folder(name)

            with session_scope() as db_session:
                account = db_session.query(Account).get(self.account_id)

                folder = Folder.find_or_create(db_session, account, name,
                                               folder_id)
                setattr(account, folder_id + '_folder', folder)
                db_session.commit()

            self._folder_names[folder_id] = name

        return self._folder_names
Example #37
0
    def __init__(
        self,
        account_id,
        namespace_id,
        folder_name,
        email_address,
        provider_name,
        syncmanager_lock,
    ):

        with session_scope(namespace_id) as db_session:
            try:
                folder = (db_session.query(Folder).filter(
                    Folder.name == folder_name,
                    Folder.account_id == account_id).one())
            except NoResultFound:
                raise MailsyncError(
                    u"Missing Folder '{}' on account {}".format(
                        folder_name, account_id))

            self.folder_id = folder.id
            self.folder_role = folder.canonical_name
            # Metric flags for sync performance
            self.is_initial_sync = folder.initial_sync_end is None
            self.is_first_sync = folder.initial_sync_start is None
            self.is_first_message = self.is_first_sync

        bind_context(self, "foldersyncengine", account_id, self.folder_id)
        self.account_id = account_id
        self.namespace_id = namespace_id
        self.folder_name = folder_name
        self.email_address = email_address

        if self.folder_name.lower() == "inbox":
            self.poll_frequency = INBOX_POLL_FREQUENCY
        else:
            self.poll_frequency = DEFAULT_POLL_FREQUENCY
        self.syncmanager_lock = syncmanager_lock
        self.state = None
        self.provider_name = provider_name
        self.last_fast_refresh = None
        self.flags_fetch_results = {}
        self.conn_pool = connection_pool(self.account_id)
        self.polling_logged_at = 0

        self.state_handlers = {
            "initial": self.initial_sync,
            "initial uidinvalid": self.resync_uids,
            "poll": self.poll,
            "poll uidinvalid": self.resync_uids,
            "finish": lambda: "finish",
        }

        self.setup_heartbeats()
        Greenlet.__init__(self)

        # Some generic IMAP servers are throwing UIDVALIDITY
        # errors forever. Instead of resyncing those servers
        # ad vitam, we keep track of the number of consecutive
        # times we got such an error and bail out if it's higher than
        # MAX_UIDINVALID_RESYNCS.
        self.uidinvalid_count = 0
Example #38
0
    def throttled(self):
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            throttled = account.throttled

        return throttled
def upgrade():
    from inbox.ignition import main_engine
    from inbox.models.session import session_scope

    engine = main_engine(pool_size=1, max_overflow=0)
    Base = declarative_base()
    Base.metadata.reflect(engine)
    # ADD:
    op.add_column(
        "imapaccount", sa.Column("family_name", sa.String(length=255), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("g_gender", sa.String(length=16), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("g_locale", sa.String(length=16), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("g_picture_url", sa.String(length=255), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("g_plus_url", sa.String(length=255), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("given_name", sa.String(length=255), nullable=True)
    )
    op.add_column(
        "imapaccount", sa.Column("google_id", sa.String(length=255), nullable=True)
    )

    # MOVE:
    class Account_(Base):
        __table__ = Base.metadata.tables["account"]

    with session_scope() as db_session:
        results = db_session.query(
            Account_.id,
            Account_.family_name,
            Account_.google_id,
            Account_.g_plus_url,
            Account_.g_picture_url,
            Account_.g_gender,
            Account_.given_name,
            Account_.g_locale,
        ).all()

    imapaccount = table(
        "imapaccount",
        column("id", sa.String),
        column("family_name", sa.String),
        column("google_id", sa.String),
        column("g_plus_url", sa.String),
        column("g_picture_url", sa.String),
        column("g_gender", sa.String),
        column("given_name", sa.String),
        column("g_locale", sa.String),
    )

    for r in results:
        op.execute(
            imapaccount.update()
            .where(imapaccount.c.id == r[0])
            .values(
                {
                    "family_name": r[1],
                    "google_id": r[2],
                    "g_plus_url": r[3],
                    "g_picture_url": r[4],
                    "g_gender": r[5],
                    "given_name": r[6],
                    "g_locale": r[7],
                }
            )
        )

    # DROP:
    op.drop_column("account", "family_name")
    op.drop_column("account", "google_id")
    op.drop_column("account", "g_plus_url")
    op.drop_column("account", "g_picture_url")
    op.drop_column("account", "g_gender")
    op.drop_column("account", "given_name")
    op.drop_column("account", "g_locale")
Example #40
0
def _batch_delete(engine,
                  table,
                  column_id_filters,
                  account_id,
                  throttle=False,
                  dry_run=False):
    (column, id_) = column_id_filters
    count = engine.execute("SELECT COUNT(*) FROM {} WHERE {}={};".format(
        table, column, id_)).scalar()

    if count == 0:
        log.info("Completed batch deletion", table=table)
        return

    batches = int(math.ceil(float(count) / CHUNK_SIZE))

    log.info("Starting batch deletion",
             table=table,
             count=count,
             batches=batches)
    start = time.time()

    if table in ("message", "block"):
        query = ""
    else:
        query = "DELETE FROM {} WHERE {}={} LIMIT {};".format(
            table, column, id_, CHUNK_SIZE)

    log.info("deleting", account_id=account_id, table=table)

    for _ in range(0, batches):
        if throttle:
            bulk_throttle()

        if table == "block":
            with session_scope(account_id) as db_session:
                blocks = list(
                    db_session.query(Block.id, Block.data_sha256).filter(
                        Block.namespace_id == id_).limit(CHUNK_SIZE))
            blocks = list(blocks)
            block_ids = [b[0] for b in blocks]
            block_hashes = [b[1] for b in blocks]

            # XXX: We currently don't check for existing blocks.
            if dry_run is False:
                delete_from_blockstore(*block_hashes)

            with session_scope(account_id) as db_session:
                query = db_session.query(Block).filter(Block.id.in_(block_ids))
                if dry_run is False:
                    query.delete(synchronize_session=False)

        elif table == "message":
            with session_scope(account_id) as db_session:
                # messages must be order by the foreign key `received_date`
                # otherwise MySQL will raise an error when deleting
                # from the message table
                messages = list(
                    db_session.query(Message.id, Message.data_sha256).filter(
                        Message.namespace_id == id_).order_by(
                            desc(Message.received_date)).limit(CHUNK_SIZE).
                    with_hint(
                        Message,
                        "use index (ix_message_namespace_id_received_date)"))

            message_ids = [m[0] for m in messages]
            message_hashes = [m[1] for m in messages]

            with session_scope(account_id) as db_session:
                existing_hashes = list(
                    db_session.query(Message.data_sha256).filter(
                        Message.data_sha256.in_(message_hashes)).filter(
                            Message.namespace_id != id_).distinct())
            existing_hashes = [h[0] for h in existing_hashes]

            remove_hashes = set(message_hashes) - set(existing_hashes)
            if dry_run is False:
                delete_from_blockstore(*list(remove_hashes))

            with session_scope(account_id) as db_session:
                query = db_session.query(Message).filter(
                    Message.id.in_(message_ids))
                if dry_run is False:
                    query.delete(synchronize_session=False)

        else:
            if dry_run is False:
                engine.execute(query)
            else:
                log.debug(query)

    end = time.time()
    log.info("Completed batch deletion", time=end - start, table=table)

    count = engine.execute("SELECT COUNT(*) FROM {} WHERE {}={};".format(
        table, column, id_)).scalar()

    if dry_run is False:
        assert count == 0
Example #41
0
def delete_namespace(namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    Raises AccountDeletionErrror with message if there are problems
    """

    with session_scope(namespace_id) as db_session:
        try:
            account = (db_session.query(Account).join(Namespace).filter(
                Namespace.id == namespace_id).one())
        except NoResultFound:
            raise AccountDeletionErrror("Could not find account in database")

        if not account.is_marked_for_deletion:
            raise AccountDeletionErrror(
                "Account is_marked_for_deletion is False. "
                "Change this to proceed with deletion.")
        account_id = account.id
        account_discriminator = account.discriminator

    log.info("Deleting account", account_id=account_id)
    start_time = time.time()

    # These folders are used to configure batch deletion in chunks for
    # specific tables that are prone to transaction blocking during
    # large concurrent write volume.  See _batch_delete
    # NOTE: ImapFolderInfo doesn't reall fall into this category but
    # we include here for simplicity anyway.

    filters = OrderedDict()
    for table in [
            "message",
            "block",
            "thread",
            "transaction",
            "actionlog",
            "event",
            "contact",
            "dataprocessingcache",
    ]:
        filters[table] = ("namespace_id", namespace_id)

    if account_discriminator == "easaccount":
        filters["easuid"] = ("easaccount_id", account_id)
        filters["easfoldersyncstatus"] = ("account_id", account_id)
    else:
        filters["imapuid"] = ("account_id", account_id)
        filters["imapfoldersyncstatus"] = ("account_id", account_id)
        filters["imapfolderinfo"] = ("account_id", account_id)

    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    for cls in filters:
        _batch_delete(engine,
                      cls,
                      filters[cls],
                      account_id,
                      throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = "DELETE FROM {} WHERE {}={};"

    filters = OrderedDict()
    for table in ("category", "calendar"):
        filters[table] = ("namespace_id", namespace_id)
    for table in ("folder", "label"):
        filters[table] = ("account_id", account_id)
    filters["namespace"] = ("id", namespace_id)

    for table, (column, id_) in iteritems(filters):
        log.info("Performing bulk deletion", table=table)
        start = time.time()

        if throttle:
            bulk_throttle()

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info("Completed bulk deletion", table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()

    # Delete liveness data ( heartbeats)
    log.debug("Deleting liveness data", account_id=account_id)
    clear_heartbeat_status(account_id)

    statsd_client.timing("mailsync.account_deletion.queue.deleted",
                         time.time() - start_time)
Example #42
0
 def start_hook(self, hook_public_id):
     with session_scope() as db_session:
         hook = db_session.query(Webhook). \
             filter_by(public_id=hook_public_id).one()
         self._start_hook(hook, db_session)
Example #43
0
 def _report_initial_sync_end(self):
     with session_scope(self.namespace_id) as db_session:
         q = db_session.query(Folder).get(self.folder_id)
         q.initial_sync_end = datetime.utcnow()
Example #44
0
 def _get_access_token(self):
     with session_scope() as db_session:
         acc = db_session.query(Account).get(self.account_id)
         # This will raise OAuthError if OAuth access was revoked. The
         # BaseSyncMonitor loop will catch this, clean up, and exit.
         return token_manager.get_token(acc)
Example #45
0
    def _run_impl(self):
        old_state = self.state
        try:
            self.state = self.state_handlers[old_state]()
            self.heartbeat_status.publish(state=self.state)
        except UidInvalid:
            self.state = self.state + " uidinvalid"
            self.uidinvalid_count += 1
            self.heartbeat_status.publish(state=self.state)

            # Check that we're not stuck in an endless uidinvalidity resync loop.
            if self.uidinvalid_count > MAX_UIDINVALID_RESYNCS:
                log.error(
                    "Resynced more than MAX_UIDINVALID_RESYNCS in a"
                    " row. Stopping sync.",
                    folder_name=self.folder_name,
                )

                # Only stop syncing the entire account if the INBOX folder is
                # failing. Otherwise simply stop syncing the folder.
                if self.folder_name.lower() == "inbox":
                    with session_scope(self.namespace_id) as db_session:
                        account = db_session.query(Account).get(
                            self.account_id)
                        account.disable_sync("Detected endless uidvalidity "
                                             "resync loop")
                        account.sync_state = "stopped"
                        db_session.commit()
                    raise MailsyncDone()
                else:
                    self.state = "finish"
                    self.heartbeat_status.publish(state=self.state)

        except FolderMissingError:
            # Folder was deleted by monitor while its sync was running.
            # TODO: Monitor should handle shutting down the folder engine.
            log.info(
                "Folder disappeared. Stopping sync.",
                account_id=self.account_id,
                folder_id=self.folder_id,
            )
            raise MailsyncDone()
        except ValidationError as exc:
            log.error(
                "Error authenticating; stopping sync",
                exc_info=True,
                account_id=self.account_id,
                folder_id=self.folder_id,
                logstash_tag="mark_invalid",
            )
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                account.mark_invalid()
                account.update_sync_error(exc)
            raise MailsyncDone()

        # State handlers are idempotent, so it's okay if we're
        # killed between the end of the handler and the commit.
        if self.state != old_state:

            def update(status):
                status.state = self.state

            self.update_folder_sync_status(update)

        if self.state == old_state and self.state in ["initial", "poll"]:
            # We've been through a normal state transition without raising any
            # error. It's safe to reset the uidvalidity counter.
            self.uidinvalid_count = 0
Example #46
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id)
                         for acc in db_session.query(Account)
                         if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge(
            'mailsync.{}.account_deletion.queue.length'.format(shard_id),
            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn(
                        'Account NOT marked for deletion. '
                        'Will not delete',
                        account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id,
                                 namespace_id,
                                 throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed',
                             error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully',
             shard_id=shard_id,
             time=end - start,
             count=deleted_count)
Example #47
0
def delete_namespace(account_id, namespace_id, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    """
    from inbox.models.session import session_scope
    from inbox.models import Account
    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    # Chunk delete for tables that might have a large concurrent write volume
    # to prevent those transactions from blocking.
    # NOTE: ImapFolderInfo does not fall into this category but we include it
    # here for simplicity.

    filters = OrderedDict()

    for table in [
            'message', 'block', 'thread', 'transaction', 'actionlog',
            'contact', 'event', 'dataprocessingcache'
    ]:
        filters[table] = ('namespace_id', namespace_id)

    with session_scope(namespace_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if account.discriminator != 'easaccount':
            filters['imapuid'] = ('account_id', account_id)
            filters['imapfoldersyncstatus'] = ('account_id', account_id)
            filters['imapfolderinfo'] = ('account_id', account_id)
        else:
            filters['easuid'] = ('easaccount_id', account_id)
            filters['easfoldersyncstatus'] = ('account_id', account_id)

    for cls in filters:
        _batch_delete(engine, cls, filters[cls], dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = 'DELETE FROM {} WHERE {}={};'

    filters = OrderedDict()
    for table in ('category', 'calendar'):
        filters[table] = ('namespace_id', namespace_id)
    for table in ('folder', 'label'):
        filters[table] = ('account_id', account_id)
    filters['namespace'] = ('id', namespace_id)

    for table, (column, id_) in filters.iteritems():
        print 'Performing bulk deletion for table: {}'.format(table)
        start = time.time()

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            print query.format(table, column, id_)

        end = time.time()
        print 'Completed bulk deletion for table: {}, time taken: {}'.\
            format(table, end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()
Example #48
0
def condstore_base_poll(crispin_client, log, folder_name, shared_state,
                        highestmodseq_fn):
    """ Base polling logic for IMAP servers which support CONDSTORE and IDLE.

    The CONDSTORE / HIGHESTMODSEQ mechanism is used to detect new and changed
    messages that need syncing.

    """
    log.bind(state='poll')

    with session_scope(ignore_soft_deletes=False) as db_session:
        saved_folder_info = account.get_folder_info(crispin_client.account_id,
                                                    db_session, folder_name)

        saved_highestmodseq = saved_folder_info.highestmodseq

    # Start a session since we're going to IDLE below anyway...
    # This also resets the folder name cache, which we want in order to
    # detect folder/label additions and deletions.
    status = crispin_client.select_folder(
        folder_name, uidvalidity_cb(crispin_client.account_id))

    log.debug(current_modseq=status['HIGHESTMODSEQ'],
              saved_modseq=saved_highestmodseq)

    if status['HIGHESTMODSEQ'] > saved_highestmodseq:
        with session_scope(ignore_soft_deletes=False) as db_session:
            acc = db_session.query(ImapAccount).get(crispin_client.account_id)
            save_folder_names(log, acc, crispin_client.folder_names(),
                              db_session)
        highestmodseq_update(crispin_client, log, folder_name,
                             saved_highestmodseq, highestmodseq_fn,
                             shared_state['syncmanager_lock'])

    # We really only want to idle on a folder for new messages. Idling on
    # `All Mail` won't tell us when messages are archived from the Inbox
    if folder_name.lower() in IDLE_FOLDERS:
        status = crispin_client.select_folder(
            folder_name, uidvalidity_cb(crispin_client.account_id))

        idle_frequency = 1800  # 30min

        log.info('idling', timeout=idle_frequency)
        crispin_client.conn.idle()
        crispin_client.conn.idle_check(timeout=idle_frequency)

        # If we want to do something with the response, but lousy
        # because it uses sequence IDs instead of UIDs
        # resp = c.idle_check(timeout=shared_state['poll_frequency'])
        # r = dict( EXISTS=[], EXPUNGE=[])
        # for msg_uid, cmd in resp:
        #     r[cmd].append(msg_uid)
        # print r

        crispin_client.conn.idle_done()
        log.info('IDLE triggered poll')
    else:
        log.info('IDLE sleeping', seconds=shared_state['poll_frequency'])
        sleep(shared_state['poll_frequency'])

    return 'poll'
Example #49
0
def upgrade():
    easupdate = False

    print 'Creating new tables and columns...'
    op.create_table(
        'folder', sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('account_id', sa.Integer(), nullable=False),
        sa.Column('name',
                  sa.String(length=191, collation='utf8mb4_general_ci'),
                  nullable=True),
        sa.ForeignKeyConstraint(['account_id'], ['account.id'],
                                ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint('account_id', 'name'))
    op.create_table(
        'internaltag', sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('public_id', mysql.BINARY(16), nullable=False),
        sa.Column('namespace_id', sa.Integer(), nullable=False),
        sa.Column('name', sa.String(length=191), nullable=False),
        sa.Column('thread_id', sa.Integer(), nullable=False),
        sa.ForeignKeyConstraint(['namespace_id'], ['namespace.id'],
                                ondelete='CASCADE'),
        sa.ForeignKeyConstraint(['thread_id'], ['thread.id'],
                                ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint('namespace_id', 'name'))
    op.add_column('folderitem',
                  sa.Column('folder_id', sa.Integer(), nullable=True))
    op.create_foreign_key("fk_folder_id",
                          "folderitem",
                          "folder", ["folder_id"], ["id"],
                          ondelete='CASCADE')

    op.add_column('account',
                  sa.Column('inbox_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('sent_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('drafts_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('spam_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('trash_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('archive_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('all_folder_id', sa.Integer, nullable=True))
    op.add_column('account',
                  sa.Column('starred_folder_id', sa.Integer, nullable=True))
    op.create_foreign_key('account_ibfk_2', 'account', 'folder',
                          ['inbox_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_3', 'account', 'folder',
                          ['sent_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_4', 'account', 'folder',
                          ['drafts_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_5', 'account', 'folder',
                          ['spam_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_6', 'account', 'folder',
                          ['trash_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_7', 'account', 'folder',
                          ['archive_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_8', 'account', 'folder',
                          ['all_folder_id'], ['id'])
    op.create_foreign_key('account_ibfk_9', 'account', 'folder',
                          ['starred_folder_id'], ['id'])

    op.add_column('imapuid', sa.Column('folder_id', sa.Integer, nullable=True))
    op.create_foreign_key('imapuid_ibfk_3', 'imapuid', 'folder', ['folder_id'],
                          ['id'])

    from inbox.models.session import session_scope
    from inbox.ignition import main_engine
    engine = main_engine(pool_size=1, max_overflow=0)

    Base = declarative_base()
    Base.metadata.reflect(engine)

    if 'easuid' in Base.metadata.tables:
        easupdate = True
        print 'Adding new EASUid columns...'

        op.add_column('easuid',
                      sa.Column('fld_uid', sa.Integer(), nullable=True))

        op.add_column('easuid',
                      sa.Column('folder_id', sa.Integer(), nullable=True))

        op.create_foreign_key('easuid_ibfk_3', 'easuid', 'folder',
                              ['folder_id'], ['id'])

        op.create_unique_constraint(
            'uq_easuid_folder_id_msg_uid_easaccount_id', 'easuid',
            ['folder_id', 'msg_uid', 'easaccount_id'])

        op.create_index('easuid_easaccount_id_folder_id', 'easuid',
                        ['easaccount_id', 'folder_id'])

    # Include our changes to the EASUid table:
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Folder(Base):
        __table__ = Base.metadata.tables['folder']
        account = relationship('Account',
                               foreign_keys='Folder.account_id',
                               backref='folders')

    class FolderItem(Base):
        __table__ = Base.metadata.tables['folderitem']
        folder = relationship('Folder', backref='threads', lazy='joined')

    class Thread(Base):
        __table__ = Base.metadata.tables['thread']
        folderitems = relationship('FolderItem',
                                   backref="thread",
                                   single_parent=True,
                                   cascade='all, delete, delete-orphan')
        namespace = relationship('Namespace', backref='threads')

    class Namespace(Base):
        __table__ = Base.metadata.tables['namespace']
        account = relationship('Account',
                               backref=backref('namespace', uselist=False))

    class Account(Base):
        __table__ = Base.metadata.tables['account']
        inbox_folder = relationship('Folder',
                                    foreign_keys='Account.inbox_folder_id')
        sent_folder = relationship('Folder',
                                   foreign_keys='Account.sent_folder_id')
        drafts_folder = relationship('Folder',
                                     foreign_keys='Account.drafts_folder_id')
        spam_folder = relationship('Folder',
                                   foreign_keys='Account.spam_folder_id')
        trash_folder = relationship('Folder',
                                    foreign_keys='Account.trash_folder_id')
        starred_folder = relationship('Folder',
                                      foreign_keys='Account.starred_folder_id')
        archive_folder = relationship('Folder',
                                      foreign_keys='Account.archive_folder_id')
        all_folder = relationship('Folder',
                                  foreign_keys='Account.all_folder_id')

    class ImapUid(Base):
        __table__ = Base.metadata.tables['imapuid']
        folder = relationship('Folder', backref='imapuids', lazy='joined')

    if easupdate:

        class EASUid(Base):
            __table__ = Base.metadata.tables['easuid']
            folder = relationship('Folder',
                                  foreign_keys='EASUid.folder_id',
                                  backref='easuids',
                                  lazy='joined')

    print 'Creating Folder rows and migrating FolderItems...'
    # not many folders per account, so shouldn't grow that big
    with session_scope(versioned=False,
                       ignore_soft_deletes=False) as db_session:
        folders = dict([((i.account_id, i.name), i)
                        for i in db_session.query(Folder).all()])
        count = 0
        for folderitem in db_session.query(FolderItem).join(Thread).join(
                Namespace).yield_per(CHUNK_SIZE):
            account_id = folderitem.thread.namespace.account_id
            if folderitem.thread.namespace.account.provider == 'gmail':
                if folderitem.folder_name in folder_name_subst_map:
                    new_folder_name = folder_name_subst_map[
                        folderitem.folder_name]
                else:
                    new_folder_name = folderitem.folder_name
            elif folderitem.thread.namespace.account.provider == 'eas':
                new_folder_name = folderitem.folder_name.title()

            if (account_id, new_folder_name) in folders:
                f = folders[(account_id, new_folder_name)]
            else:
                f = Folder(account_id=account_id, name=new_folder_name)
                folders[(account_id, new_folder_name)] = f
            folderitem.folder = f
            count += 1
            if count > CHUNK_SIZE:
                db_session.commit()
                count = 0
        db_session.commit()

        print 'Migrating ImapUids to reference Folder rows...'
        for imapuid in db_session.query(ImapUid).yield_per(CHUNK_SIZE):
            account_id = imapuid.imapaccount_id
            if imapuid.folder_name in folder_name_subst_map:
                new_folder_name = folder_name_subst_map[imapuid.folder_name]
            else:
                new_folder_name = imapuid.folder_name
            if (account_id, new_folder_name) in folders:
                f = folders[(account_id, new_folder_name)]
            else:
                f = Folder(account_id=account_id, name=new_folder_name)
                folders[(account_id, new_folder_name)] = f
            imapuid.folder = f
            count += 1
            if count > CHUNK_SIZE:
                db_session.commit()
                count = 0
        db_session.commit()

        if easupdate:
            print 'Migrating EASUids to reference Folder rows...'

            for easuid in db_session.query(EASUid).yield_per(CHUNK_SIZE):
                account_id = easuid.easaccount_id
                new_folder_name = easuid.folder_name

                if (account_id, new_folder_name) in folders:
                    f = folders[(account_id, new_folder_name)]
                else:
                    f = Folder(account_id=account_id, name=new_folder_name)
                    folders[(account_id, new_folder_name)] = f
                easuid.folder = f
                count += 1
                if count > CHUNK_SIZE:
                    db_session.commit()
                    count = 0
            db_session.commit()

        print 'Migrating *_folder_name fields to reference Folder rows...'
        for account in db_session.query(Account).filter_by(provider='gmail'):
            if account.inbox_folder_name:
                # hard replace INBOX with canonicalized caps
                k = (account.id, 'Inbox')
                if k in folders:
                    account.inbox_folder = folders[k]
                else:
                    account.inbox_folder = Folder(
                        account_id=account.id,
                        name=folder_name_subst_map[account.inbox_folder_name])
            if account.sent_folder_name:
                k = (account.id, account.sent_folder_name)
                if k in folders:
                    account.sent_folder = folders[k]
                else:
                    account.sent_folder = Folder(account_id=account.id,
                                                 name=account.sent_folder_name)
            if account.drafts_folder_name:
                k = (account.id, account.drafts_folder_name)
                if k in folders:
                    account.drafts_folder = folders[k]
                else:
                    account.drafts_folder = Folder(
                        account_id=account.id, name=account.drafts_folder_name)
            # all/archive mismatch is intentional; semantics have changed
            if account.archive_folder_name:
                k = (account.id, account.archive_folder_name)
                if k in folders:
                    account.all_folder = folders[k]
                else:
                    account.all_folder = Folder(
                        account_id=account.id,
                        name=account.archive_folder_name)
        db_session.commit()

        if easupdate:
            print "Migrating EAS accounts' *_folder_name fields to reference "\
                  "Folder rows..."

            for account in db_session.query(Account).filter_by(provider='eas'):
                if account.inbox_folder_name:
                    k = (account.id, account.inbox_folder_name)
                    if k in folders:
                        account.inbox_folder = folders[k]
                    else:
                        account.inbox_folder = Folder(
                            account_id=account.id,
                            name=account.inbox_folder_name)
                if account.sent_folder_name:
                    k = (account.id, account.sent_folder_name)
                    if k in folders:
                        account.sent_folder = folders[k]
                    else:
                        account.sent_folder = Folder(
                            account_id=account.id,
                            name=account.sent_folder_name)
                if account.drafts_folder_name:
                    k = (account.id, account.drafts_folder_name)
                    if k in folders:
                        account.drafts_folder = folders[k]
                    else:
                        account.drafts_folder = Folder(
                            account_id=account.id,
                            name=account.drafts_folder_name)
                if account.archive_folder_name:
                    k = (account.id, account.archive_folder_name)
                    if k in folders:
                        account.archive_folder = folders[k]
                    else:
                        account.archive_folder = Folder(
                            account_id=account.id,
                            name=account.archive_folder_name)
            db_session.commit()

    print 'Final schema tweaks and new constraint enforcement'
    op.alter_column('folderitem',
                    'folder_id',
                    existing_type=sa.Integer(),
                    nullable=False)
    op.drop_constraint('folder_name', 'folderitem', type_='unique')
    op.drop_constraint('folder_name', 'imapuid', type_='unique')
    op.create_unique_constraint('uq_imapuid_folder_id_msg_uid_imapaccount_id',
                                'imapuid',
                                ['folder_id', 'msg_uid', 'imapaccount_id'])
    op.drop_column('folderitem', 'folder_name')
    op.drop_column('imapuid', 'folder_name')
    op.drop_column('account', 'inbox_folder_name')
    op.drop_column('account', 'drafts_folder_name')
    op.drop_column('account', 'sent_folder_name')
    op.drop_column('account', 'archive_folder_name')

    if easupdate:
        print 'Dropping old EASUid columns...'

        op.drop_constraint('folder_name', 'easuid', type_='unique')
        op.drop_index('easuid_easaccount_id_folder_name', 'easuid')
        op.drop_column('easuid', 'folder_name')
Example #50
0
def check_new_uids(account_id, folder_name, log, uid_download_stack,
                   poll_frequency, syncmanager_lock):
    """ Check for new UIDs and add them to the download stack.

    We do this by comparing local UID lists to remote UID lists, maintaining
    the invariant that (stack uids)+(local uids) == (remote uids).

    We also remove local messages that have disappeared from the remote, since
    it's totally probable that users will be archiving mail as the initial
    sync goes on.

    We grab a new IMAP connection from the pool for this to isolate its
    actions from whatever the main greenlet may be doing.

    Runs until killed. (Intended to be run in a greenlet.)
    """
    log.info("starting new UID-check poller")
    with _pool(account_id).get() as crispin_client:
        crispin_client.select_folder(folder_name,
                                     uidvalidity_cb(crispin_client.account_id))
        while True:
            remote_uids = set(crispin_client.all_uids())
            # We lock this section to make sure no messages are being
            # created while we make sure the queue is in a good state.
            with syncmanager_lock:
                log.debug("check_new_uids acquired syncmanager_lock")
                with session_scope(ignore_soft_deletes=False) as db_session:
                    local_uids = set(
                        account.all_uids(account_id, db_session, folder_name))
                    stack_uids = set(uid_download_stack.queue)
                    local_with_pending_uids = local_uids | stack_uids
                    deleted_uids = remove_deleted_uids(account_id, db_session,
                                                       log, folder_name,
                                                       local_uids, remote_uids)
                    log.info('remoted deleted uids', count=len(deleted_uids))

                    # filter out messages that have disappeared on the
                    # remote side
                    new_uid_download_stack = {
                        u
                        for u in uid_download_stack.queue if u in remote_uids
                    }

                    # add in any new uids from the remote
                    for uid in remote_uids:
                        if uid not in local_with_pending_uids:
                            log.debug(
                                "adding new message {} to download queue".
                                format(uid))
                            new_uid_download_stack.add(uid)
                    uid_download_stack.queue = sorted(new_uid_download_stack,
                                                      key=int)

                    update_uid_counts(
                        db_session,
                        log,
                        crispin_client.account_id,
                        folder_name,
                        remote_uid_count=len(remote_uids),
                        download_uid_count=uid_download_stack.qsize(),
                        delete_uid_count=len(deleted_uids))

            sleep(poll_frequency)
Example #51
0
 def _smtp_oauth2_try_refresh(self):
     with session_scope(self.account_id) as db_session:
         account = db_session.query(ImapAccount).get(self.account_id)
         self.auth_token = token_manager.get_token(account,
                                                   force_refresh=True)
Example #52
0
def base_poll(account_id, provider_instance, last_sync_fn, target_obj,
              set_last_sync_fn):
    """Query a remote provider for updates and persist them to the
    database.

    Parameters
    ----------
    account_id: int
        ID for the account whose items should be queried.
    db_session: sqlalchemy.orm.session.Session
        Database session

    provider: Interface to the remote item data provider.
        Must have a PROVIDER_NAME attribute and implement the get()
        method.
    """

    log = logger.new(account_id=account_id)
    provider_name = provider_instance.PROVIDER_NAME
    with session_scope() as db_session:
        account = db_session.query(Account).get(account_id)
        last_sync = or_none(last_sync_fn(account),
                            datetime.datetime.isoformat)

    items = provider_instance.get_items(last_sync)
    with session_scope() as db_session:
        account = db_session.query(Account).get(account_id)
        change_counter = Counter()
        to_commit = []
        for item in items:
            item.namespace = account.namespace
            assert item.uid is not None, \
                'Got remote item with null uid'
            assert isinstance(item.uid, str)

            target_obj = target_obj
            matching_items = db_session.query(target_obj).filter(
                target_obj.namespace == account.namespace,
                target_obj.provider_name == provider_name,
                target_obj.uid == item.uid)
            # Snapshot of item data from immediately after last sync:
            cached_item = matching_items. \
                filter(target_obj.source == 'remote').first()

            # Item data reflecting any local modifications since the last
            # sync with the remote provider:
            local_item = matching_items. \
                filter(target_obj.source == 'local').first()
            # If the remote item was deleted, purge the corresponding
            # database entries.
            if item.deleted:
                if cached_item is not None:
                    db_session.delete(cached_item)
                    change_counter['deleted'] += 1
                if local_item is not None:
                    db_session.delete(local_item)
                continue
            # Otherwise, update the database.
            if cached_item is not None:
                # The provider gave an update to a item we already have.
                if local_item is not None:
                    try:
                        # Attempt to merge remote updates into local_item
                        local_item.merge_from(cached_item, item)
                        # And update cached_item to reflect both local and
                        # remote updates
                        cached_item.copy_from(local_item)

                    except MergeError:
                        log.error('Conflicting local and remote updates'
                                  'to item.',
                                  local=local_item, cached=cached_item,
                                  remote=item)
                        # For now, just don't update if conflict ing
                        continue
                else:
                    log.warning('Item already present as remote but not '
                                'local item', cached_item=cached_item)
                    cached_item.copy_from(item)
                change_counter['updated'] += 1
            else:
                # This is a new item, create both local and remote DB
                # entries.
                local_item = target_obj()
                local_item.copy_from(item)
                local_item.source = 'local'
                to_commit.append(item)
                to_commit.append(local_item)
                change_counter['added'] += 1

        set_last_sync_fn(account)

        log.info('sync', added=change_counter['added'],
                 updated=change_counter['updated'],
                 deleted=change_counter['deleted'])

        db_session.add_all(to_commit)
        db_session.commit()
Example #53
0
def highestmodseq_update(crispin_client, log, folder_name, last_highestmodseq,
                         highestmodseq_fn, syncmanager_lock):
    account_id = crispin_client.account_id
    new_highestmodseq = crispin_client.selected_highestmodseq
    new_uidvalidity = crispin_client.selected_uidvalidity
    log.info('starting highestmodseq update',
             current_highestmodseq=new_highestmodseq)
    changed_uids = crispin_client.new_and_updated_uids(last_highestmodseq)
    remote_uids = crispin_client.all_uids()

    local_uids = None
    if changed_uids:
        with session_scope(ignore_soft_deletes=False) as db_session:
            local_uids = account.all_uids(account_id, db_session, folder_name)

        new, updated = new_or_updated(changed_uids, local_uids)
        log.info(new_uid_count=len(new), updated_uid_count=len(updated))

        local_uids += new
        with syncmanager_lock:
            log.debug("highestmodseq_update acquired syncmanager_lock")
            with session_scope(ignore_soft_deletes=False) as db_session:
                deleted_uids = remove_deleted_uids(account_id, db_session, log,
                                                   folder_name, local_uids,
                                                   remote_uids)

        local_uids = set(local_uids) - deleted_uids
        update_metadata(crispin_client, log, folder_name, updated,
                        syncmanager_lock)

        with session_scope(ignore_soft_deletes=False) as db_session:
            update_uid_counts(db_session,
                              log,
                              account_id,
                              folder_name,
                              remote_uid_count=len(remote_uids),
                              download_uid_count=len(new),
                              update_uid_count=len(updated),
                              delete_uid_count=len(deleted_uids))

        highestmodseq_fn(crispin_client, log, folder_name, new, updated,
                         syncmanager_lock)
    else:
        log.info("No new or updated messages")

    with session_scope(ignore_soft_deletes=False) as db_session:
        with syncmanager_lock:
            log.debug("highestmodseq_update acquired syncmanager_lock")
            if local_uids is None:
                local_uids = account.all_uids(account_id, db_session,
                                              folder_name)
            deleted_uids = remove_deleted_uids(crispin_client.account_id,
                                               db_session, log, folder_name,
                                               local_uids, remote_uids)
        update_uid_counts(db_session,
                          log,
                          account_id,
                          folder_name,
                          remote_uid_count=len(remote_uids),
                          delete_uid_count=len(deleted_uids))
        account.update_folder_info(account_id, db_session, folder_name,
                                   new_uidvalidity, new_highestmodseq)
        db_session.commit()
Example #54
0
def syncback_worker(semaphore,
                    action,
                    action_log_id,
                    record_id,
                    account_id,
                    syncback_service,
                    retry_interval=30,
                    extra_args=None):
    func = ACTION_FUNCTION_MAP[action]

    with semaphore:
        log = logger.new(record_id=record_id,
                         action_log_id=action_log_id,
                         action=func,
                         account_id=account_id,
                         extra_args=extra_args)
        # Not ignoring soft-deleted objects here because if you, say,
        # delete a draft, we still need to access the object to delete it
        # on the remote.
        try:
            with session_scope(ignore_soft_deletes=False) as db_session:
                if extra_args:
                    func(account_id, record_id, db_session, extra_args)
                else:
                    func(account_id, record_id, db_session)
                action_log_entry = db_session.query(ActionLog).get(
                    action_log_id)
                action_log_entry.status = 'successful'
                db_session.commit()
                latency = round((datetime.utcnow() -
                                 action_log_entry.created_at).total_seconds(),
                                2)
                log.info('syncback action completed',
                         action_id=action_log_id,
                         latency=latency)
                syncback_service.remove_from_schedule(action_log_id)
        except Exception as e:
            # To reduce error-reporting noise, don't ship to Sentry
            # if not actionable.
            if isinstance(e, ProviderSpecificException):
                log.warning('Uncaught error', exc_info=True)
            else:
                log_uncaught_errors(log, account_id=account_id)

            with session_scope() as db_session:
                action_log_entry = db_session.query(ActionLog).get(
                    action_log_id)
                action_log_entry.retries += 1

                if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES:
                    log.critical('Max retries reached, giving up.',
                                 action_id=action_log_id,
                                 account_id=account_id,
                                 exc_info=True)
                    action_log_entry.status = 'failed'
                db_session.commit()

            # Wait for a bit before retrying
            gevent.sleep(retry_interval)

            # Remove the entry from the scheduled set so that it can be
            # retried or given up on.
            syncback_service.remove_from_schedule(action_log_id)

            # Again, don't raise on exceptions that require
            # provider-specific handling e.g. EAS
            if not isinstance(e, ProviderSpecificException):
                raise
Example #55
0
 def _smtp_oauth2_try_refresh(self):
     with session_scope() as db_session:
         account = db_session.query(ImapAccount).get(self.account_id)
         self.auth_token = account.renew_access_token()
Example #56
0
def sync_deltas():
    g.parser.add_argument('cursor',
                          type=valid_public_id,
                          location='args',
                          required=True)
    g.parser.add_argument('exclude_types',
                          type=valid_delta_object_types,
                          location='args')
    g.parser.add_argument('include_types',
                          type=valid_delta_object_types,
                          location='args')
    g.parser.add_argument('timeout',
                          type=int,
                          default=LONG_POLL_REQUEST_TIMEOUT,
                          location='args')
    # TODO(emfree): should support `expand` parameter in delta endpoints.
    args = strict_parse_args(g.parser, request.args)
    exclude_types = args.get('exclude_types')
    include_types = args.get('include_types')
    cursor = args['cursor']
    timeout = args['timeout']

    if include_types and exclude_types:
        return err(
            400, "Invalid Request. Cannot specify both include_types"
            "and exclude_types")

    if cursor == '0':
        start_pointer = 0
    else:
        try:
            start_pointer, = g.db_session.query(Transaction.id). \
                filter(Transaction.public_id == cursor,
                       Transaction.namespace_id == g.namespace.id).one()
        except NoResultFound:
            raise InputError('Invalid cursor parameter')

    # The client wants us to wait until there are changes
    g.db_session.close()  # hack to close the flask session
    poll_interval = 1

    start_time = time.time()
    while time.time() - start_time < timeout:
        with session_scope() as db_session:
            deltas, _ = delta_sync.format_transactions_after_pointer(
                g.namespace, start_pointer, db_session, args['limit'],
                exclude_types, include_types)

        response = {
            'cursor_start': cursor,
            'deltas': deltas,
        }
        if deltas:
            response['cursor_end'] = deltas[-1]['cursor']
            return g.encoder.jsonify(response)

        # No changes. perhaps wait
        elif '/delta/longpoll' in request.url_rule.rule:
            gevent.sleep(poll_interval)
        else:  # Return immediately
            response['cursor_end'] = cursor
            return g.encoder.jsonify(response)

    # If nothing happens until timeout, just return the end of the cursor
    response['cursor_end'] = cursor
    return g.encoder.jsonify(response)
Example #57
0
def upgrade():
    from inbox.models.session import session_scope
    from inbox.models.folder import Folder
    from inbox.sqlalchemy_ext.util import JSON
    from inbox.ignition import main_engine
    engine = main_engine(pool_size=1, max_overflow=0)

    ### foldersync => imapfoldersyncstatus
    # note that renaming a table does in fact migrate constraints + indexes too
    op.rename_table('foldersync', 'imapfoldersyncstatus')

    op.alter_column('imapfoldersyncstatus',
                    '_sync_status',
                    existing_type=JSON(),
                    nullable=True,
                    new_column_name='_metrics')

    op.add_column('imapfoldersyncstatus',
                  sa.Column('folder_id', sa.Integer(), nullable=False))

    ### uidvalidity => imapfolderinfo
    op.rename_table('uidvalidity', 'imapfolderinfo')
    op.alter_column('imapfolderinfo',
                    'uid_validity',
                    existing_type=sa.Integer(),
                    nullable=False,
                    new_column_name='uidvalidity')
    op.alter_column('imapfolderinfo',
                    'highestmodseq',
                    existing_type=sa.Integer(),
                    nullable=True)

    op.drop_constraint('imapfolderinfo_ibfk_1',
                       'imapfolderinfo',
                       type_='foreignkey')
    op.alter_column('imapfolderinfo',
                    'imapaccount_id',
                    existing_type=sa.Integer(),
                    nullable=False,
                    new_column_name='account_id')
    op.create_foreign_key('imapfolderinfo_ibfk_1', 'imapfolderinfo',
                          'imapaccount', ['account_id'], ['id'])

    op.add_column('imapfolderinfo',
                  sa.Column('folder_id', sa.Integer(), nullable=False))

    ### imapuid
    op.drop_constraint('imapuid_ibfk_1', 'imapuid', type_='foreignkey')
    op.alter_column('imapuid',
                    'imapaccount_id',
                    existing_type=sa.Integer(),
                    nullable=False,
                    new_column_name='account_id')
    op.create_foreign_key('imapuid_ibfk_1', 'imapuid', 'imapaccount',
                          ['account_id'], ['id'])

    ### migrate data and add new constraints
    Base = sa.ext.declarative.declarative_base()
    Base.metadata.reflect(engine)

    if 'easfoldersync' in Base.metadata.tables:
        op.rename_table('easfoldersync', 'easfoldersyncstatus')
        op.add_column('easfoldersyncstatus',
                      sa.Column('folder_id', sa.Integer(), nullable=False))
        op.alter_column('easfoldersyncstatus',
                        '_sync_status',
                        existing_type=JSON(),
                        nullable=True,
                        new_column_name='_metrics')
        Base.metadata.reflect(engine)

        class EASFolderSyncStatus(Base):
            __table__ = Base.metadata.tables['easfoldersyncstatus']

    class ImapFolderSyncStatus(Base):
        __table__ = Base.metadata.tables['imapfoldersyncstatus']

    class ImapFolderInfo(Base):
        __table__ = Base.metadata.tables['imapfolderinfo']

    with session_scope(versioned=False, ignore_soft_deletes=False) \
            as db_session:
        folder_id_for = dict([((account_id, name.lower()), id_)
                              for id_, account_id, name in db_session.query(
                                  Folder.id, Folder.account_id, Folder.name)])
        for status in db_session.query(ImapFolderSyncStatus):
            print "migrating", status.folder_name
            status.folder_id = folder_id_for[(status.account_id,
                                              status.folder_name.lower())]
        db_session.commit()
        if 'easfoldersyncstatus' in Base.metadata.tables:
            for status in db_session.query(EASFolderSyncStatus):
                print "migrating", status.folder_name
                folder_id = folder_id_for.get(
                    (status.account_id, status.folder_name.lower()))
                if folder_id is not None:
                    status.folder_id = folder_id
                else:
                    # EAS folder rows *may* not exist if have no messages
                    folder = Folder(account_id=status.account_id,
                                    name=status.folder_name)
                    db_session.add(folder)
                    db_session.commit()
                    status.folder_id = folder.id
            db_session.commit()
            # some weird alembic bug? need to drop and recreate this FK
            op.drop_constraint('easfoldersyncstatus_ibfk_1',
                               'easfoldersyncstatus',
                               type_='foreignkey')
            op.drop_column('easfoldersyncstatus', 'folder_name')
            op.create_foreign_key('easfoldersyncstatus_ibfk_1',
                                  'easfoldersyncstatus', 'easaccount',
                                  ['account_id'], ['id'])
            op.create_foreign_key('easfoldersyncstatus_ibfk_2',
                                  'easfoldersyncstatus', 'folder',
                                  ['folder_id'], ['id'])
            op.create_unique_constraint('account_id', 'easfoldersyncstatus',
                                        ['account_id', 'folder_id'])

    # some weird alembic bug? need to drop and recreate this FK
    op.drop_constraint('imapfoldersyncstatus_ibfk_1',
                       'imapfoldersyncstatus',
                       type_='foreignkey')
    op.drop_constraint('account_id', 'imapfoldersyncstatus', type_='unique')
    op.drop_column('imapfoldersyncstatus', 'folder_name')
    op.create_foreign_key('imapfoldersyncstatus_ibfk_1',
                          'imapfoldersyncstatus', 'imapaccount',
                          ['account_id'], ['id'])
    op.create_foreign_key('imapfoldersyncstatus_ibfk_2',
                          'imapfoldersyncstatus', 'folder', ['folder_id'],
                          ['id'])
    op.create_unique_constraint('account_id', 'imapfoldersyncstatus',
                                ['account_id', 'folder_id'])

    with session_scope(versioned=False, ignore_soft_deletes=False) \
            as db_session:
        for info in db_session.query(ImapFolderInfo):
            print "migrating", info.folder_name
            info.folder_id = folder_id_for[(info.account_id,
                                            info.folder_name.lower())]
        db_session.commit()

    # some weird alembic bug? need to drop and recreate this FK
    op.drop_constraint('imapfolderinfo_ibfk_1',
                       'imapfolderinfo',
                       type_='foreignkey')
    op.drop_constraint('imapaccount_id', 'imapfolderinfo', type_='unique')
    op.drop_column('imapfolderinfo', 'folder_name')
    op.create_foreign_key('imapfolderinfo_ibfk_1', 'imapfolderinfo',
                          'imapaccount', ['account_id'], ['id'])
    op.create_foreign_key('imapfolderinfo_ibfk_2', 'imapfolderinfo', 'folder',
                          ['folder_id'], ['id'])
    op.create_unique_constraint('imapaccount_id', 'imapfolderinfo',
                                ['account_id', 'folder_id'])
def upgrade():
    import datetime
    from sqlalchemy.ext.declarative import declarative_base
    from sqlalchemy.orm import relationship
    from inbox.config import config
    from inbox.models.session import session_scope
    from inbox.ignition import main_engine
    engine = main_engine()

    now = datetime.datetime.now()
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class GmailAccount(Base):
        __table__ = Base.metadata.tables['gmailaccount']

    class Secret(Base):
        __table__ = Base.metadata.tables['secret']

    class GmailAuthCredentials(Base):
        __table__ = Base.metadata.tables['gmailauthcredentials']
        secret = relationship(Secret)

    with session_scope(versioned=False) as db_session:

        for acc, sec in db_session.query(GmailAccount, Secret) \
                        .filter(GmailAccount.refresh_token_id == Secret.id,
                                GmailAccount.scope != None,
                                GmailAccount.g_id_token != None) \
                        .all():

            # Create a new GmailAuthCredentials entry if
            # we don't have one already
            if db_session.query(GmailAuthCredentials, Secret) \
                    .filter(GmailAuthCredentials.gmailaccount_id == acc.id) \
                    .filter(Secret._secret == sec._secret) \
                    .count() == 0:

                # Create a new secret
                new_sec = Secret()
                new_sec.created_at = now
                new_sec.updated_at = now
                new_sec._secret = sec._secret
                new_sec.type = sec.type  # 'token'
                new_sec.encryption_scheme = sec.encryption_scheme

                # Create a new GmailAuthCredentials entry
                auth_creds = GmailAuthCredentials()
                auth_creds.gmailaccount_id = acc.id
                auth_creds.scopes = acc.scope
                auth_creds.g_id_token = acc.g_id_token
                auth_creds.created_at = now
                auth_creds.updated_at = now
                auth_creds.secret = new_sec

                auth_creds.client_id = \
                    (acc.client_id or
                     config.get_required('GOOGLE_OAUTH_CLIENT_ID'))

                auth_creds.client_secret = \
                    (acc.client_secret or
                     config.get_required('GOOGLE_OAUTH_CLIENT_SECRET'))

                db_session.add(auth_creds)
                db_session.add(new_sec)

        db_session.commit()
def populate():
    # Populate new classes from the existing data
    from inbox.models.event import (Event, RecurringEvent,
                                    RecurringEventOverride)
    from inbox.models.session import session_scope
    from inbox.events.util import parse_datetime
    from inbox.events.recurring import link_events

    with session_scope() as db:
        # Redo recurrence rule population, since we extended the column length
        print "Repopulating max-length recurrences...",
        for e in db.query(Event).filter(
                sa.func.length(Event.recurrence) > 250):
            try:
                raw_data = json.loads(e.raw_data)
            except:
                try:
                    raw_data = ast.literal_eval(e.raw_data)
                except:
                    print "Could not load raw data for event {}".format(e.id)
                    continue
            e.recurrence = raw_data['recurrence']
        db.commit()
        print "done."

        print "Updating types for Override...",
        # Slightly hacky way to convert types (only needed for one-off import)
        convert = """UPDATE event SET type='recurringeventoverride' WHERE
                     raw_data LIKE '%recurringEventId%'"""
        db.execute(convert)
        create = """INSERT INTO recurringeventoverride (id)
                    SELECT id FROM event
                    WHERE type='recurringeventoverride'
                    AND id NOT IN
                    (SELECT id FROM recurringeventoverride)"""
        try:
            db.execute(create)
        except Exception as e:
            print "Couldn't insert RecurringEventOverrides: {}".format(e)
            exit(2)
        print "done."

        c = 0
        print "Expanding Overrides .",
        query = db.query(RecurringEventOverride)
        for e in query:
            try:
                # Some raw data is str(dict), other is json.dumps
                raw_data = json.loads(e.raw_data)
            except:
                try:
                    raw_data = ast.literal_eval(e.raw_data)
                except:
                    print "Could not load raw data for event {}".format(e.id)
                    continue
            rec_uid = raw_data.get('recurringEventId')
            if rec_uid:
                e.master_event_uid = rec_uid
                ost = raw_data.get('originalStartTime')
                if ost:
                    # this is a dictionary with one value
                    start_time = ost.values().pop()
                    e.original_start_time = parse_datetime(start_time)
                # attempt to get the ID for the event, if we can, and
                # set the relationship appropriately
                if raw_data.get('status') == 'cancelled':
                    e.cancelled = True
                link_events(db, e)
                c += 1
                if c % 100 == 0:
                    print ".",
                    sys.stdout.flush()
        db.commit()
        print "done. ({} modified)".format(c)

        # Convert Event to RecurringEvent
        print "Updating types for RecurringEvent...",
        convert = """UPDATE event SET type='recurringevent' WHERE
                     recurrence IS NOT NULL"""
        db.execute(convert)
        create = """INSERT INTO recurringevent (id)
                    SELECT id FROM event
                    WHERE type='recurringevent'
                    AND id NOT IN
                    (SELECT id FROM recurringevent)"""
        try:
            db.execute(create)
        except Exception as e:
            print "Couldn't insert RecurringEvents: {}".format(e)
            exit(2)
        print "done."

        # Pull out recurrence metadata from recurrence
        c = 0
        print "Expanding master events .",
        query = db.query(RecurringEvent)
        for r in query:
            r.unwrap_rrule()
            try:
                raw_data = json.loads(r.raw_data)
            except:
                try:
                    raw_data = ast.literal_eval(r.raw_data)
                except:
                    print "Could not load raw data for event {}".format(r.id)
                    continue
            r.start_timezone = raw_data['start'].get('timeZone')
            # find any un-found overrides that didn't have masters earlier
            link_events(db, r)
            db.add(r)
            c += 1
            if c % 100 == 0:
                print ".",
                sys.stdout.flush()
        db.commit()
        print "done. ({} modified)".format(c)

        # Finally, convert all remaining Events to type='event'
        convert = """UPDATE event SET type='event' WHERE type IS NULL"""
        db.execute(convert)
Example #60
0
    def start_sync(self, account_id):
        """
        Starts a sync for the account with the given account_id.
        If that account doesn't exist, does nothing.

        """
        with self.semaphore, session_scope(account_id) as db_session:
            acc = db_session.query(Account).with_for_update().get(account_id)
            if acc is None:
                self.log.error("no such account", account_id=account_id)
                return False
            if not acc.sync_should_run:
                return False
            if (acc.desired_sync_host is not None
                    and acc.desired_sync_host != self.process_identifier):
                return False
            if acc.sync_host is not None and acc.sync_host != self.process_identifier:
                return False
            self.log.info("starting sync",
                          account_id=acc.id,
                          email_address=acc.email_address)

            if acc.id in self.syncing_accounts:
                self.log.info("sync already started", account_id=account_id)
                return False

            try:
                acc.sync_host = self.process_identifier
                if acc.sync_email:
                    monitor = self.monitor_cls_for[acc.provider](acc)
                    self.email_sync_monitors[acc.id] = monitor
                    monitor.start()

                info = acc.provider_info
                if info.get("contacts", None) and acc.sync_contacts:
                    contact_sync = ContactSync(
                        acc.email_address,
                        acc.verbose_provider,
                        acc.id,
                        acc.namespace.id,
                    )
                    self.contact_sync_monitors[acc.id] = contact_sync
                    contact_sync.start()

                if info.get("events", None) and acc.sync_events:
                    if USE_GOOGLE_PUSH_NOTIFICATIONS and acc.provider == "gmail":
                        event_sync = GoogleEventSync(
                            acc.email_address,
                            acc.verbose_provider,
                            acc.id,
                            acc.namespace.id,
                        )
                    else:
                        event_sync = EventSync(
                            acc.email_address,
                            acc.verbose_provider,
                            acc.id,
                            acc.namespace.id,
                        )
                    self.event_sync_monitors[acc.id] = event_sync
                    event_sync.start()

                acc.sync_started()
                self.syncing_accounts.add(acc.id)
                # TODO (mark): Uncomment this after we've transitioned to from statsd to brubeck
                # statsd_client.gauge('mailsync.sync_hosts_counts.{}'.format(acc.id), 1, delta=True)
                db_session.commit()
                self.log.info("Sync started",
                              account_id=account_id,
                              sync_host=acc.sync_host)
            except Exception:
                self.log.error("Error starting sync",
                               exc_info=True,
                               account_id=account_id)
                return False
        return True