def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids) flags = crispin_client.flags(local_uids) if (max_uids in self.flags_fetch_results and self.flags_fetch_results[max_uids] == (local_uids, flags)): # If the flags fetch response is exactly the same as the last one # we got, then we don't need to persist any changes. log.debug('Unchanged flags refresh response, ' 'not persisting changes', max_uids=max_uids) return log.debug('Changed flags refresh response, persisting changes', max_uids=max_uids) expunged_uids = set(local_uids).difference(flags.keys()) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, self.folder_role, flags, db_session) self.flags_fetch_results[max_uids] = (local_uids, flags)
def get_g_metadata(crispin_client, log, folder_name, uids, syncmanager_lock): assert folder_name == crispin_client.selected_folder_name, \ "crispin selected folder isn't as expected" account_id = crispin_client.account_id remote_g_metadata = None update_uid_count = 0 with session_scope(ignore_soft_deletes=False) as db_session: saved_folder_info = account.get_folder_info( account_id, db_session, folder_name) saved_highestmodseq = or_none(saved_folder_info, lambda i: i.highestmodseq) if saved_highestmodseq is not None: # If there's no cached validity we probably haven't run before. remote_g_metadata, update_uid_count = retrieve_saved_g_metadata( crispin_client, log, folder_name, uids, saved_highestmodseq, syncmanager_lock) if remote_g_metadata is None: remote_g_metadata = crispin_client.g_metadata( crispin_client.all_uids()) set_cache(remote_g_metadata_cache_file(account_id, folder_name), remote_g_metadata) # Save highestmodseq that corresponds to the saved g_metadata. with session_scope(ignore_soft_deletes=False) as db_session: account.update_folder_info(account_id, db_session, folder_name, crispin_client.selected_uidvalidity, crispin_client.selected_highestmodseq) db_session.commit() return remote_g_metadata, update_uid_count
def _run_impl(self): self.log.info('Starting LabelRenameHandler', label_name=self.label_name) with connection_pool(self.account_id).get() as crispin_client: folder_names = [] with session_scope(self.account_id) as db_session: folders = db_session.query(Folder).filter( Folder.account_id == self.account_id) folder_names = [folder.name for folder in folders] db_session.expunge_all() for folder_name in folder_names: crispin_client.select_folder(folder_name, uidvalidity_cb) found_uids = crispin_client.search_uids( ['X-GM-LABELS', utf7_encode(self.label_name)]) flags = crispin_client.flags(found_uids) self.log.info('Running metadata update for folder', folder_name=folder_name) with session_scope(self.account_id) as db_session: common.update_metadata(self.account_id, folder.id, flags, db_session) db_session.commit()
def remote_update_folder(crispin_client, account_id, category_id, old_name, new_name): with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) account_provider = account.provider if account_provider not in ['gmail', 'eas']: new_display_name = imap_folder_path( new_name, separator=crispin_client.folder_separator, prefix=crispin_client.folder_prefix) else: new_display_name = new_name crispin_client.conn.rename_folder(old_name, new_display_name) # TODO @karim: Make the main sync loop detect folder renames # more accurately, and get rid of this. if new_display_name != old_name: with session_scope(account_id) as db_session: category = db_session.query(Category).get(category_id) category.display_name = new_display_name for folder in category.folders: if folder.name == old_name: folder.name = new_display_name
def _run(self): with self.semaphore: log = logger.new( record_id=self.record_id, action_log_id=self.action_log_id, action=self.action_name, account_id=self.account_id, extra_args=self.extra_args) for _ in range(ACTION_MAX_NR_OF_RETRIES): try: before_func = datetime.utcnow() if self.extra_args: self.func(self.account_id, self.record_id, self.extra_args) else: self.func(self.account_id, self.record_id) after_func = datetime.utcnow() with session_scope(self.account_id) as db_session: action_log_entry = db_session.query(ActionLog).get( self.action_log_id) action_log_entry.status = 'successful' db_session.commit() latency = round((datetime.utcnow() - action_log_entry.created_at). total_seconds(), 2) func_latency = round((after_func - before_func). total_seconds(), 2) log.info('syncback action completed', action_id=self.action_log_id, latency=latency, process=self.parent_service.process_number, func_latency=func_latency) self._log_to_statsd(action_log_entry.status, latency) return except Exception: log_uncaught_errors(log, account_id=self.account_id, provider=self.provider) with session_scope(self.account_id) as db_session: action_log_entry = db_session.query(ActionLog).get( self.action_log_id) action_log_entry.retries += 1 if (action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES): log.critical('Max retries reached, giving up.', exc_info=True) action_log_entry.status = 'failed' self._log_to_statsd(action_log_entry.status) db_session.commit() return db_session.commit() # Wait before retrying log.info("Syncback worker retrying action after sleeping", duration=self.retry_interval) # TODO(T6974): We might want to do some kind of exponential # backoff with jitter to avoid the thundering herd problem if a # provider suddenly starts having issues for a short period of # time. gevent.sleep(self.retry_interval)
def test_password_auth(email, password): with session_scope() as db_session: create_account(db_session, email, password) start_time = time.time() # Check that the account exists while time.time() - start_time < TEST_MAX_DURATION_SECS: client = APIClient.from_email(email)[0] if client is not None: break time.sleep(TEST_GRANULARITY_CHECK_SECS) if client is None: assert False, "Account namespace should have been created" # Now, compute how much time it takes to start syncing the account start_time = time.time() got_messages = False while time.time() - start_time < TEST_MAX_DURATION_SECS: messages = client.get_messages() if len(messages) != 0: got_messages = True break time.sleep(TEST_GRANULARITY_CHECK_SECS) assert got_messages, "Messages should have been found" print "test_password_auth %s %f" % (email, time.time() - start_time) # remove the account with session_scope() as db_session: # remove_account(db_session, email) pass
def _run(self): """ Index into Elasticsearch the threads, messages of all namespaces. """ with session_scope() as db_session: pointer = db_session.query(SearchIndexCursor).first() self.transaction_pointer = pointer.transaction_id if pointer else 0 self.log.info('Starting search-index service', transaction_pointer=self.transaction_pointer) while True: with session_scope() as db_session: transactions = db_session.query(Transaction). \ filter(Transaction.id > self.transaction_pointer, or_(Transaction.object_type == 'message', Transaction.object_type == 'thread')). \ order_by(asc(Transaction.id)). \ limit(self.chunk_size). \ options(joinedload(Transaction.namespace)).all() # TODO[k]: We ideally want to index chunk_size at a time. # This currently indexes <= chunk_size, and it varies each # time. if transactions: self.index(transactions, db_session) new_pointer = transactions[-1].id self.update_pointer(new_pointer, db_session) else: sleep(self.poll_interval) db_session.commit()
def _new_connection(self): from inbox.auth.base import handler_from_provider # Ensure that connections are initialized serially, so as not to use # many db sessions on startup. with self._new_conn_lock: auth_handler = handler_from_provider(self.provider_name) for retry_count in range(MAX_TRANSIENT_ERRORS): try: conn = auth_handler.connect_account(self.email_address, self.credential, self.imap_endpoint, self.account_id) # If we can connect the account, then we can set the sate # to 'running' if it wasn't already if self.sync_state != 'running': with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) self.sync_state = account.sync_state = 'running' return self.client_cls(self.account_id, self.provider_info, self.email_address, conn, readonly=self.readonly) except ConnectionError, e: if isinstance(e, TransientConnectionError): return None else: logger.error('Error connecting', account_id=self.account_id) with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) account.sync_state = 'connerror' account.update_sync_error(str(e)) return None except ValidationError, e: # If we failed to validate, but the account is oauth2, we # may just need to refresh the access token. Try this one # time. if (self.provider_info['auth'] == 'oauth2' and retry_count == 0): with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) self.credential = token_manager.get_token( account, force_refresh=True) else: logger.error('Error validating', account_id=self.account_id, logstash_tag='mark_invalid') with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) account.mark_invalid() account.update_sync_error(str(e)) raise
def syncback_worker(semaphore, action, action_log_id, record_id, account_id, syncback_service, retry_interval=30, extra_args=None): func = ACTION_FUNCTION_MAP[action] with semaphore: log = logger.new(record_id=record_id, action_log_id=action_log_id, action=func, account_id=account_id, extra_args=extra_args) # Not ignoring soft-deleted objects here because if you, say, # delete a draft, we still need to access the object to delete it # on the remote. try: with session_scope(ignore_soft_deletes=False) as db_session: if extra_args: func(account_id, record_id, db_session, extra_args) else: func(account_id, record_id, db_session) action_log_entry = db_session.query(ActionLog).get( action_log_id) action_log_entry.status = 'successful' db_session.commit() latency = round((datetime.utcnow() - action_log_entry.created_at). total_seconds(), 2) log.info('syncback action completed', action_id=action_log_id, latency=latency) syncback_service.remove_from_schedule(action_log_id) except Exception as e: # To reduce error-reporting noise, don't ship to Sentry # if not actionable. if isinstance(e, ProviderSpecificException): log.warning('Uncaught error', exc_info=True) else: log_uncaught_errors(log, account_id=account_id) with session_scope() as db_session: action_log_entry = db_session.query(ActionLog).get( action_log_id) action_log_entry.retries += 1 if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES: log.critical('Max retries reached, giving up.', action_id=action_log_id, account_id=account_id, exc_info=True) action_log_entry.status = 'failed' db_session.commit() # Wait for a bit before retrying gevent.sleep(retry_interval) # Remove the entry from the scheduled set so that it can be # retried or given up on. syncback_service.remove_from_schedule(action_log_id) # Again, don't raise on exceptions that require # provider-specific handling e.g. EAS if not isinstance(e, ProviderSpecificException): raise
def _run_impl(self): try: saved_folder_status = self._load_state() except IntegrityError: # The state insert failed because the folder ID ForeignKey # was no longer valid, ie. the folder for this engine was deleted # while we were starting up. # Exit the sync and let the monitor sort things out. log.info( "Folder state loading failed due to IntegrityError", folder_id=self.folder_id, account_id=self.account_id, ) raise MailsyncDone() # NOTE: The parent ImapSyncMonitor handler could kill us at any # time if it receives a shutdown command. The shutdown command is # equivalent to ctrl-c. while True: old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + " uidinvalid" self.heartbeat_status.publish(state=self.state) except FolderMissingError: # Folder was deleted by monitor while its sync was running. # TODO: Monitor should handle shutting down the folder engine. log.info( "Folder disappeared. Stopping sync.", account_id=self.account_id, folder_name=self.folder_name, folder_id=self.folder_id, ) raise MailsyncDone() except ValidationError as exc: log.error( "Error authenticating; stopping sync", exc_info=True, account_id=self.account_id, folder_id=self.folder_id, logstash_tag="mark_invalid", ) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc)) raise MailsyncDone() # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: # Don't need to re-query, will auto refresh on re-associate. with session_scope(self.namespace_id) as db_session: db_session.add(saved_folder_status) saved_folder_status.state = self.state db_session.commit()
def imap_initial_sync(crispin_client, log, folder_name, shared_state, local_uids, uid_download_stack, msg_create_fn, spawn_flags_refresh_poller=True): # We wrap the block in a try/finally because the greenlets like # new_uid_poller need to be killed when this greenlet is interrupted try: assert crispin_client.selected_folder_name == folder_name remote_uids = crispin_client.all_uids() log.info(remote_uid_count=len(remote_uids)) log.info(local_uid_count=len(local_uids)) with shared_state['syncmanager_lock']: log.debug("imap_initial_sync acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: deleted_uids = remove_deleted_uids( crispin_client.account_id, db_session, log, folder_name, local_uids, remote_uids) local_uids = set(local_uids) - deleted_uids new_uids = set(remote_uids) - local_uids add_uids_to_stack(new_uids, uid_download_stack) with session_scope(ignore_soft_deletes=False) as db_session: update_uid_counts(db_session, log, crispin_client.account_id, folder_name, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids), # Flags are updated in imap_check_flags() and # update_uid_count is set there delete_uid_count=len(deleted_uids)) new_uid_poller = spawn(check_new_uids, crispin_client.account_id, folder_name, log, uid_download_stack, shared_state['poll_frequency'], shared_state['syncmanager_lock']) if spawn_flags_refresh_poller: flags_refresh_poller = spawn(imap_check_flags, crispin_client.account_id, folder_name, log, shared_state['poll_frequency'], shared_state['syncmanager_lock'], shared_state['refresh_flags_max']) download_queued_uids(crispin_client, log, folder_name, uid_download_stack, len(local_uids), len(remote_uids), shared_state['syncmanager_lock'], download_and_commit_uids, msg_create_fn) finally: new_uid_poller.kill() if spawn_flags_refresh_poller: flags_refresh_poller.kill()
def sync(self): """Query a remote provider for updates and persist them to the database. This function runs every `self.poll_frequency`. """ self.log.info('syncing contacts') # Grab timestamp so next sync gets deltas from now sync_timestamp = datetime.utcnow() with session_scope() as db_session: account = db_session.query(Account).get(self.account_id) last_sync_dt = account.last_synced_contacts all_contacts = self.provider.get_items(sync_from_dt=last_sync_dt) # Do a batch insertion of every 100 contact objects change_counter = Counter() for new_contact in all_contacts: new_contact.namespace = account.namespace assert new_contact.uid is not None, \ 'Got remote item with null uid' assert isinstance(new_contact.uid, basestring) try: existing_contact = db_session.query(Contact).filter( Contact.namespace == account.namespace, Contact.provider_name == self.provider.PROVIDER_NAME, Contact.uid == new_contact.uid).one() # If the remote item was deleted, purge the corresponding # database entries. if new_contact.deleted: db_session.delete(existing_contact) change_counter['deleted'] += 1 else: # Update fields in our old item with the new. # Don't save the newly returned item to the database. existing_contact.merge_from(new_contact) change_counter['updated'] += 1 except NoResultFound: # We didn't know about this before! Add this item. db_session.add(new_contact) change_counter['added'] += 1 # Flush every 100 objects for perf if sum(change_counter.values()) % 100: db_session.flush() # Update last sync with session_scope() as db_session: account = db_session.query(Account).get(self.account_id) account.last_synced_contacts = sync_timestamp self.log.info('synced contacts', added=change_counter['added'], updated=change_counter['updated'], deleted=change_counter['deleted'])
def highestmodseq_update(crispin_client, log, folder_name, last_highestmodseq, highestmodseq_fn, syncmanager_lock): account_id = crispin_client.account_id new_highestmodseq = crispin_client.selected_highestmodseq new_uidvalidity = crispin_client.selected_uidvalidity log.info('starting highestmodseq update', current_highestmodseq=new_highestmodseq) changed_uids = crispin_client.new_and_updated_uids(last_highestmodseq) remote_uids = crispin_client.all_uids() local_uids = None if changed_uids: with session_scope(ignore_soft_deletes=False) as db_session: local_uids = account.all_uids(account_id, db_session, folder_name) new, updated = new_or_updated(changed_uids, local_uids) log.info(new_uid_count=len(new), updated_uid_count=len(updated)) local_uids += new with syncmanager_lock: log.debug("highestmodseq_update acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: deleted_uids = remove_deleted_uids(account_id, db_session, log, folder_name, local_uids, remote_uids) local_uids = set(local_uids) - deleted_uids update_metadata(crispin_client, log, folder_name, updated, syncmanager_lock) with session_scope(ignore_soft_deletes=False) as db_session: update_uid_counts(db_session, log, account_id, folder_name, remote_uid_count=len(remote_uids), download_uid_count=len(new), update_uid_count=len(updated), delete_uid_count=len(deleted_uids)) highestmodseq_fn(crispin_client, log, folder_name, new, updated, syncmanager_lock) else: log.info("No new or updated messages") with session_scope(ignore_soft_deletes=False) as db_session: with syncmanager_lock: log.debug("highestmodseq_update acquired syncmanager_lock") if local_uids is None: local_uids = account.all_uids( account_id, db_session, folder_name) deleted_uids = remove_deleted_uids(crispin_client.account_id, db_session, log, folder_name, local_uids, remote_uids) update_uid_counts(db_session, log, account_id, folder_name, remote_uid_count=len(remote_uids), delete_uid_count=len(deleted_uids)) account.update_folder_info(account_id, db_session, folder_name, new_uidvalidity, new_highestmodseq) db_session.commit()
def execute_with_lock(self): self.log = logger.new( record_ids=self.record_ids, action_log_ids=self.action_log_ids, action=self.action_name, account_id=self.account_id, extra_args=self.extra_args) # Double-check that the action is still pending. # Although the task queue is populated based on pending actions, it's # possible that the processing of one action involved marking other # actions as failed. records_to_process, action_ids_to_process = self._get_records_and_actions_to_process() if len(action_ids_to_process) == 0: return for attempt in range(ACTION_MAX_NR_OF_RETRIES): self.log.debug("executing action", attempt=attempt) try: before, after = self._execute_timed_action(records_to_process) with session_scope(self.account_id) as db_session: action_log_entries = db_session.query(ActionLog). \ filter(ActionLog.id.in_(action_ids_to_process)) for action_log_entry in action_log_entries: self._mark_action_as_successful(action_log_entry, before, after, db_session) return except: log_uncaught_errors(self.log, account_id=self.account_id, provider=self.provider) with session_scope(self.account_id) as db_session: action_log_entries = db_session.query(ActionLog). \ filter(ActionLog.id.in_(action_ids_to_process)) marked_as_failed = False for action_log_entry in action_log_entries: action_log_entry.retries += 1 if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES: marked_as_failed = True self._mark_action_as_failed(action_log_entry, db_session) # If we've merged SyncbackTasks then their corresponding # actions should all fail at the same time. assert (not marked_as_failed or action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES) db_session.commit() if marked_as_failed: return # Wait before retrying self.log.info("Syncback task retrying action after sleeping", duration=self.retry_interval) # TODO(T6974): We might want to do some kind of exponential # backoff with jitter to avoid the thundering herd problem if a # provider suddenly starts having issues for a short period of # time. gevent.sleep(self.retry_interval)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the greenlets like # change_poller need to be killed when this greenlet is interrupted change_poller = None try: remote_uids = sorted(crispin_client.all_uids(), key=int) with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids) - set(remote_uids)) unknown_uids = set(remote_uids) - local_uids with session_scope(self.namespace_id) as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=len(unknown_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) if self.is_all_mail(crispin_client): # Prioritize UIDs for messages in the inbox folder. if len(remote_uids) < 1e6: inbox_uids = set( crispin_client.search_uids(['X-GM-LABELS', 'inbox'])) else: # The search above is really slow (times out) on really # large mailboxes, so bound the search to messages within # the past month in order to get anywhere. since = datetime.utcnow() - timedelta(days=30) inbox_uids = set(crispin_client.search_uids([ 'X-GM-LABELS', 'inbox', 'SINCE', since])) uids_to_download = (sorted(unknown_uids - inbox_uids) + sorted(unknown_uids & inbox_uids)) else: uids_to_download = sorted(unknown_uids) for uids in chunk(reversed(uids_to_download), 1024): g_metadata = crispin_client.g_metadata(uids) # UIDs might have been expunged since sync started, in which # case the g_metadata call above will return nothing. # They may also have been preemptively downloaded by thread # expansion. We can omit such UIDs. uids = [u for u in uids if u in g_metadata and u not in self.saved_uids] self.batch_download_uids(crispin_client, uids, g_metadata) finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def _new_connection(self): from inbox.auth import handler_from_provider # Ensure that connections are initialized serially, so as not to use # many db sessions on startup. with self._new_conn_lock as _: auth_handler = handler_from_provider(self.provider_name) for retry_count in range(MAX_TRANSIENT_ERRORS): try: conn = auth_handler.connect_account(self.provider_name, self.email_address, self.credential) # If we can connect the account, then we can set the sate # to 'running' if it wasn't already if self.sync_state != 'running': with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) self.sync_state = account.sync_state = 'running' return new_crispin(self.account_id, self.email_address, self.provider_name, conn, self.readonly) except ConnectionError, e: if isinstance(e, TransientConnectionError): return None else: logger.error('Error connecting', account_id=self.account_id) with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) account.sync_state = 'connerror' return None except ValidationError, e: # If we failed to validate, but the account is oauth2, we # may just need to refresh the access token. Try this one # time. if (self.provider_info['auth'] == 'oauth2' and retry_count == 0): with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) self.credential = account.renew_access_token() else: logger.error('Error validating', account_id=self.account_id) with session_scope() as db_session: query = db_session.query(ImapAccount) account = query.get(self.account_id) account.sync_state = 'invalid' raise
def _run_impl(self): old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + ' uidinvalid' self.uidinvalid_count += 1 self.heartbeat_status.publish(state=self.state) # Check that we're not stuck in an endless uidinvalidity resync loop. if self.uidinvalid_count > MAX_UIDINVALID_RESYNCS: log.error('Resynced more than MAX_UIDINVALID_RESYNCS in a' ' row. Stopping sync.') with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.disable_sync('Detected endless uidvalidity ' 'resync loop') account.sync_state = 'stopped' db_session.commit() raise MailsyncDone() except FolderMissingError: # Folder was deleted by monitor while its sync was running. # TODO: Monitor should handle shutting down the folder engine. log.info('Folder disappeared. Stopping sync.', account_id=self.account_id, folder_name=self.folder_name, folder_id=self.folder_id) raise MailsyncDone() except ValidationError as exc: log.error('Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, folder_id=self.folder_id, logstash_tag='mark_invalid') with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc)) raise MailsyncDone() # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: def update(status): status.state = self.state self.update_folder_sync_status(update) if self.state == old_state and self.state in ['initial', 'poll']: # We've been through a normal state transition without raising any # error. It's safe to reset the uidvalidity counter. self.uidinvalid_count = 0
def refresh_flags_impl(self, crispin_client, max_uids): crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids( account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids ) flags = crispin_client.flags(local_uids) expunged_uids = set(local_uids).difference(flags.keys()) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, flags, db_session)
def check_new_uids(account_id, provider, folder_name, log, uid_download_stack, poll_frequency, syncmanager_lock): """ Check for new UIDs and add them to the download stack. We do this by comparing local UID lists to remote UID lists, maintaining the invariant that (stack uids)+(local uids) == (remote uids). We also remove local messages that have disappeared from the remote, since it's totally probable that users will be archiving mail as the initial sync goes on. We grab a new IMAP connection from the pool for this to isolate its actions from whatever the main greenlet may be doing. Runs until killed. (Intended to be run in a greenlet.) """ log.info("Spinning up new UID-check poller for {}".format(folder_name)) with connection_pool(account_id).get() as crispin_client: with session_scope() as db_session: crispin_client.select_folder(folder_name, uidvalidity_cb( db_session, crispin_client.account_id)) while True: remote_uids = set(crispin_client.all_uids()) # We lock this section to make sure no messages are being # created while we make sure the queue is in a good state. with syncmanager_lock: log.debug("check_new_uids acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: local_uids = set(account.all_uids(account_id, db_session, folder_name)) stack_uids = set(uid_download_stack.queue) local_with_pending_uids = local_uids | stack_uids deleted_uids = remove_deleted_uids( account_id, db_session, log, folder_name, local_uids, remote_uids) log.info("Removed {} deleted UIDs from {}".format( len(deleted_uids), folder_name)) # filter out messages that have disappeared on the remote side new_uid_download_stack = {u for u in uid_download_stack.queue if u in remote_uids} # add in any new uids from the remote for uid in remote_uids: if uid not in local_with_pending_uids: log.debug("adding new message {} to download queue" .format(uid)) new_uid_download_stack.add(uid) uid_download_stack.queue = sorted(new_uid_download_stack, key=int) sleep(poll_frequency)
def initial_sync_impl(self, crispin_client): # We wrap the block in a try/finally because the change_poller greenlet # needs to be killed when this greenlet is interrupted change_poller = None try: assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() with self.syncmanager_lock: with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) common.remove_deleted_uids( self.account_id, self.folder_id, set(local_uids).difference(remote_uids), db_session) new_uids = set(remote_uids).difference(local_uids) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) throttled = account.throttled self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), # This is the initial size of our download_queue download_uid_count=len(new_uids)) change_poller = spawn(self.poll_for_changes) bind_context(change_poller, 'changepoller', self.account_id, self.folder_id) uids = sorted(new_uids, reverse=True) count = 0 for uid in uids: # The speedup from batching appears to be less clear for # non-Gmail accounts, so for now just download one-at-a-time. self.download_and_commit_uids(crispin_client, [uid]) self.heartbeat_status.publish() count += 1 if throttled and count >= THROTTLE_COUNT: # Throttled accounts' folders sync at a rate of # 1 message/ minute, after the first approx. THROTTLE_COUNT # messages per folder are synced. # Note this is an approx. limit since we use the #(uids), # not the #(messages). sleep(THROTTLE_WAIT) finally: if change_poller is not None: # schedule change_poller to die kill(change_poller)
def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning('got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return # Highestmodseq has changed, update accordingly. crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, changed_flags, db_session) local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: # If new UIDs have appeared since we last checked in # get_new_uids, save them first. We want to always have the # latest UIDs before expunging anything, in order to properly # capture draft revisions. with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) if remote_uids and lastseenuid < max(remote_uids): log.info('Downloading new UIDs before expunging') self.get_new_uids(crispin_client) with session_scope(self.namespace_id) as db_session: common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids, db_session) db_session.commit() self.highestmodseq = new_highestmodseq
def download_and_commit_uids(self, crispin_client, uids): start = datetime.utcnow() raw_messages = crispin_client.uids(uids) if not raw_messages: return new_uids = set() with self.syncmanager_lock: with session_scope() as db_session: account = Account.get(self.account_id, db_session) folder = Folder.get(self.folder_id, db_session) raw_messages = self.__deduplicate_message_object_creation( db_session, raw_messages, account) if not raw_messages: return 0 for msg in raw_messages: uid = self.create_message(db_session, account, folder, msg) if uid is not None: db_session.add(uid) db_session.commit() new_uids.add(uid) log.info('Committed new UIDs', new_committed_message_count=len(new_uids)) # If we downloaded uids, record message velocity (#uid / latency) if self.state == "initial" and len(new_uids): self._report_message_velocity(datetime.utcnow() - start, len(new_uids)) if self.is_first_message: self._report_first_message() self.is_first_message = False self.saved_uids.update(new_uids)
def resync_uids_impl(self): # First, let's check if the UIVDALIDITY change was spurious, if # it is, just discard it and go on. with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, lambda *args: True) remote_uidvalidity = crispin_client.selected_uidvalidity remote_uidnext = crispin_client.selected_uidnext if remote_uidvalidity <= self.uidvalidity: log.debug('UIDVALIDITY unchanged') return # Otherwise, if the UIDVALIDITY really has changed, discard all saved # UIDs for the folder, mark associated messages for garbage-collection, # and return to the 'initial' state to resync. # This will cause message and threads to be deleted and recreated, but # uidinvalidity is sufficiently rare that this tradeoff is acceptable. with session_scope(self.namespace_id) as db_session: invalid_uids = { uid for uid, in db_session.query(ImapUid.msg_uid). filter_by(account_id=self.account_id, folder_id=self.folder_id) } common.remove_deleted_uids(self.account_id, self.folder_id, invalid_uids, db_session) self.uidvalidity = remote_uidvalidity self.highestmodseq = None self.uidnext = remote_uidnext
def get_new_uids(self, crispin_client): try: remote_uidnext = crispin_client.conn.folder_status( self.folder_name, ['UIDNEXT']).get('UIDNEXT') except ValueError: # Work around issue where ValueError is raised on parsing STATUS # response. log.warning('Error getting UIDNEXT', exc_info=True) remote_uidnext = None except imaplib.IMAP4.error as e: if '[NONEXISTENT]' in e.message: raise FolderMissingError() else: raise e if remote_uidnext is not None and remote_uidnext == self.uidnext: return log.info('UIDNEXT changed, checking for new UIDs', remote_uidnext=remote_uidnext, saved_uidnext=self.uidnext) crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) latest_uids = crispin_client.conn.fetch('{}:*'.format(lastseenuid + 1), ['UID']).keys() new_uids = set(latest_uids) - {lastseenuid} if new_uids: for uid in sorted(new_uids): self.download_and_commit_uids(crispin_client, [uid]) self.uidnext = remote_uidnext
def initial_sync(self): log.bind(state='initial') log.info('starting initial sync') if self.is_first_sync: self._report_initial_sync_start() self.is_first_sync = False with self.conn_pool.get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb) # Ensure we have an ImapFolderInfo row created prior to sync start. with session_scope(self.namespace_id) as db_session: try: db_session.query(ImapFolderInfo). \ filter(ImapFolderInfo.account_id == self.account_id, ImapFolderInfo.folder_id == self.folder_id). \ one() except NoResultFound: imapfolderinfo = ImapFolderInfo( account_id=self.account_id, folder_id=self.folder_id, uidvalidity=crispin_client.selected_uidvalidity, uidnext=crispin_client.selected_uidnext) db_session.add(imapfolderinfo) db_session.commit() self.initial_sync_impl(crispin_client) if self.is_initial_sync: self._report_initial_sync_end() self.is_initial_sync = False return 'poll'
def upgrade(): if 'easfoldersyncstatus' in Base.metadata.tables: from inbox.ignition import main_engine engine = main_engine(pool_size=1, max_overflow=0) from inbox.models.session import session_scope from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm.exc import NoResultFound Base = declarative_base() Base.metadata.reflect(engine) from inbox.models.backends.eas import EASFolderSyncStatus from inbox.models import Folder from inbox.util.eas.constants import SKIP_FOLDERS with session_scope(versioned=False, ignore_soft_deletes=False) as \ db_session: statuses = db_session.query(EASFolderSyncStatus).filter( EASFolderSyncStatus.eas_folder_type.in_(SKIP_FOLDERS)).all() for s in statuses: db_session.delete(s) db_session.delete(s.folder) try: for status in db_session.query(EASFolderSyncStatus)\ .join(Folder).filter( Folder.name == 'RecipientInfo').all(): db_session.delete(status) db_session.delete(status.folder) except NoResultFound: pass db_session.commit()
def _set_account_info(self): with session_scope() as db_session: account = db_session.query(Account).get(self.account_id) self.provider_name = account.provider self.email_address = account.email_address self.provider_info = provider_info(account.provider, account.email_address) self.sync_state = account.sync_state # Refresh token if need be, for OAuthed accounts if self.provider_info['auth'] == 'oauth2': try: self.credential = account.access_token except ValidationError: logger.error("Error obtaining access token", account_id=self.account_id) account.sync_state = 'invalid' db_session.commit() raise except ConnectionError: logger.error("Error connecting", account_id=self.account_id) account.sync_state = 'connerror' db_session.commit() raise else: self.credential = account.password
def new_crispin(account_id, email_address, provider_name, conn, readonly=True): if provider_name == 'gmail': cls = GmailCrispinClient else: info = provider_info(provider_name, email_address) # look up in the provider database to see # if the provider supports CONDSTORE if "condstore" in info: if info["condstore"]: cls = CondStoreCrispinClient else: # condstore=False in provider file cls = CrispinClient else: # no match in provider file, check in the # account settings. with session_scope() as db_session: acc = db_session.query(Account).get(account_id) if acc is not None: if getattr(acc, 'supports_condstore', False): cls = CondStoreCrispinClient else: cls = CrispinClient return cls(account_id, provider_name, email_address, conn, readonly=readonly)
def report_progress(crispin_client, log, folder_name, downloaded_uid_count, num_remaining_messages): """ Inform listeners of sync progress. """ assert crispin_client.selected_folder_name == folder_name with session_scope(ignore_soft_deletes=False) as db_session: saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\ .filter( ImapFolderSyncStatus.account_id == crispin_client.account_id, Folder.name == folder_name).one() previous_count = saved_status.metrics.get( 'num_downloaded_since_timestamp', 0) metrics = dict(num_downloaded_since_timestamp=(previous_count + downloaded_uid_count), download_uid_count=num_remaining_messages, queue_checked_at=datetime.utcnow()) saved_status.update_metrics(metrics) db_session.commit() log.info('mailsync progress', folder=folder_name, msg_queue_count=num_remaining_messages)
def remote_create_folder(crispin_client, account_id, category_id): with session_scope(account_id) as db_session: category = db_session.query(Category).get(category_id) if category is None: return display_name = category.display_name crispin_client.conn.create_folder(display_name)
def execute_with_lock(self): log = logger.new(record_id=self.record_id, action_log_id=self.action_log_id, action=self.action_name, account_id=self.account_id, extra_args=self.extra_args) for _ in range(ACTION_MAX_NR_OF_RETRIES): try: before_func = datetime.utcnow() func_args = [self.account_id, self.record_id] if self.extra_args: func_args.append(self.extra_args) if self.uses_crispin_client(): assert self.crispin_client is not None func_args.insert(0, self.crispin_client) self.func(*func_args) after_func = datetime.utcnow() with session_scope(self.account_id) as db_session: action_log_entry = db_session.query(ActionLog).get( self.action_log_id) action_log_entry.status = 'successful' db_session.commit() latency = round( (datetime.utcnow() - action_log_entry.created_at).total_seconds(), 2) func_latency = round( (after_func - before_func).total_seconds(), 2) log.info('syncback action completed', action_id=self.action_log_id, latency=latency, process=self.parent_service().process_number, func_latency=func_latency) self._log_to_statsd(action_log_entry.status, latency) return except Exception: log_uncaught_errors(log, account_id=self.account_id, provider=self.provider) with session_scope(self.account_id) as db_session: action_log_entry = db_session.query(ActionLog).get( self.action_log_id) action_log_entry.retries += 1 if (action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES): log.critical('Max retries reached, giving up.', exc_info=True) action_log_entry.status = 'failed' self._log_to_statsd(action_log_entry.status) db_session.commit() return db_session.commit() # Wait before retrying log.info("Syncback task retrying action after sleeping", duration=self.retry_interval) # TODO(T6974): We might want to do some kind of exponential # backoff with jitter to avoid the thundering herd problem if a # provider suddenly starts having issues for a short period of # time. gevent.sleep(self.retry_interval)
def upgrade(): from sqlalchemy.ext.declarative import declarative_base from inbox.ignition import main_engine from inbox.models.session import session_scope engine = main_engine(pool_size=1, max_overflow=0) op.create_table( "genericaccount", sa.Column("id", sa.Integer(), nullable=False), sa.ForeignKeyConstraint(["id"], [u"imapaccount.id"], ondelete="CASCADE"), sa.Column("password_id", sa.Integer(), nullable=True), sa.Column("provider", sa.String(length=64), nullable=False), sa.PrimaryKeyConstraint("id"), ) Base = declarative_base() Base.metadata.reflect(engine) class Account(Base): __table__ = Base.metadata.tables["account"] class ImapAccount(Base): __table__ = Base.metadata.tables["imapaccount"] class YahooAccount(Base): __table__ = Base.metadata.tables["yahooaccount"] class AOLAccount(Base): __table__ = Base.metadata.tables["aolaccount"] class GenericAccount(Base): __table__ = Base.metadata.tables["genericaccount"] class Secret(Base): __table__ = Base.metadata.tables["secret"] with session_scope(versioned=False) as db_session: for acct in db_session.query(YahooAccount): secret = Secret( acl_id=0, type=0, secret=acct.password, created_at=datetime.utcnow(), updated_at=datetime.utcnow(), ) db_session.add(secret) db_session.commit() new_acct = GenericAccount(id=acct.id, provider="yahoo") new_acct.password_id = secret.id db_session.add(new_acct) for acct in db_session.query(AOLAccount): secret = Secret( acl_id=0, type=0, secret=acct.password, created_at=datetime.utcnow(), updated_at=datetime.utcnow(), ) db_session.add(secret) db_session.commit() new_acct = GenericAccount(id=acct.id, provider="aol") new_acct.password_id = secret.id db_session.add(new_acct) db_session.commit() # don't cascade the delete engine.execute("drop table aolaccount") engine.execute("drop table yahooaccount") op.drop_column("imapaccount", "imap_host")
def _new_raw_connection(self): """Returns a new, authenticated IMAPClient instance for the account.""" with session_scope() as db_session: account = db_session.query(Account).get(self.account_id) return self.auth_handler.connect_account(account)
def upgrade(): from inbox.models.session import session_scope from sqlalchemy.ext.declarative import declarative_base from inbox.ignition import main_engine engine = main_engine(pool_size=1, max_overflow=0) op.alter_column('calendar', 'notes', new_column_name='description', existing_type=sa.Text(), existing_nullable=True) op.add_column( 'calendar', sa.Column('provider_name', sa.String(length=64), nullable=False)) op.alter_column('event', 'subject', new_column_name='title', existing_type=sa.String(1024), existing_nullable=True) op.alter_column('event', 'body', new_column_name='description', existing_type=sa.Text(), existing_nullable=True) # We're changing the structure of the calendar name so that # the provider can be split out from the name as it was previously # overloaded. Nobody should have any existing inbox calendars though # so we don't have to worry about a user with a calendar name with # a dash ('-') in it. These calendars are read_only as they come from # a provider. # # Also, any already synced events are read only as nobody has created # events yet. Base = declarative_base() Base.metadata.reflect(engine) class Calendar(Base): __table__ = Base.metadata.tables['calendar'] class Event(Base): __table__ = Base.metadata.tables['event'] with session_scope(versioned=False, ignore_soft_deletes=False) \ as db_session: for calendar in db_session.query(Calendar): if calendar.name and '-' in calendar.name: provider_name, name = calendar.name.split('-') calendar.provider_name = provider_name calendar.name = name calendar.read_only = True for event in db_session.query(Event): event.read_only = True db_session.commit() op.drop_constraint('calendar_ibfk_1', 'calendar', type_='foreignkey') op.drop_constraint('uuid', 'calendar', type_='unique') op.create_unique_constraint('uuid', 'calendar', ['name', 'provider_name', 'account_id']) op.create_foreign_key(None, "calendar", "account", ["account_id"], ["id"], ondelete='CASCADE') op.drop_constraint('event_ibfk_2', 'event', type_='foreignkey') op.create_foreign_key('event_ibfk_2', 'event', 'calendar', ['calendar_id'], ['id'], ondelete='CASCADE')
def condstore_refresh_flags(self, crispin_client): new_highestmodseq = crispin_client.conn.folder_status( self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ'] # Ensure that we have an initial highestmodseq value stored before we # begin polling for changes. if self.highestmodseq is None: self.highestmodseq = new_highestmodseq if new_highestmodseq == self.highestmodseq: # Don't need to do anything if the highestmodseq hasn't # changed. return elif new_highestmodseq < self.highestmodseq: # This should really never happen, but if it does, handle it. log.warning('got server highestmodseq less than saved ' 'highestmodseq', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) return log.info('HIGHESTMODSEQ has changed, getting changed UIDs', new_highestmodseq=new_highestmodseq, saved_highestmodseq=self.highestmodseq) crispin_client.select_folder(self.folder_name, self.uidvalidity_cb) changed_flags = crispin_client.condstore_changed_flags( self.highestmodseq) remote_uids = crispin_client.all_uids() # In order to be able to sync changes to tens of thousands of flags at # once, we commit updates in batches. We do this in ascending order by # modseq and periodically "checkpoint" our saved highestmodseq. (It's # safe to checkpoint *because* we go in ascending order by modseq.) # That way if the process gets restarted halfway through this refresh, # we don't have to completely start over. It's also slow to load many # objects into the SQLAlchemy session and then issue lots of commits; # we avoid that by batching. flag_batches = chunk( sorted(changed_flags.items(), key=lambda (k, v): v.modseq), CONDSTORE_FLAGS_REFRESH_BATCH_SIZE) for flag_batch in flag_batches: with session_scope(self.namespace_id) as db_session: common.update_metadata(self.account_id, self.folder_id, self.folder_role, dict(flag_batch), db_session) if len(flag_batch) == CONDSTORE_FLAGS_REFRESH_BATCH_SIZE: interim_highestmodseq = max(v.modseq for k, v in flag_batch) self.highestmodseq = interim_highestmodseq with session_scope(self.namespace_id) as db_session: local_uids = common.local_uids(self.account_id, db_session, self.folder_id) expunged_uids = set(local_uids).difference(remote_uids) if expunged_uids: # If new UIDs have appeared since we last checked in # get_new_uids, save them first. We want to always have the # latest UIDs before expunging anything, in order to properly # capture draft revisions. with session_scope(self.namespace_id) as db_session: lastseenuid = common.lastseenuid(self.account_id, db_session, self.folder_id) if remote_uids and lastseenuid < max(remote_uids): log.info('Downloading new UIDs before expunging') self.get_new_uids(crispin_client) common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids) self.highestmodseq = new_highestmodseq
def folder_names(self): # Different providers have different names for folders, here # we have a default map for common name mapping, additional # mappings can be provided via the provider configuration file default_folder_map = { 'INBOX': 'inbox', 'DRAFTS': 'drafts', 'DRAFT': 'drafts', 'JUNK': 'spam', 'ARCHIVE': 'archive', 'SENT': 'sent', 'TRASH': 'trash', 'SPAM': 'spam' } # Some providers also provide flags to determine common folders # Here we read these flags and apply the mapping flag_to_folder_map = { '\\Trash': 'trash', '\\Sent': 'sent', '\\Drafts': 'drafts', '\\Junk': 'spam', '\\Inbox': 'inbox', '\\Spam': 'spam' } # Additionally we provide a custom mapping for providers that # don't fit into the defaults. info = provider_info(self.provider_name) folder_map = info.get('folder_map', {}) if self._folder_names is None: folders = self._fetch_folder_list() self._folder_names = dict() for flags, delimiter, name in folders: if u'\\Noselect' in flags: # special folders that can't contain messages pass # TODO: internationalization support elif name in folder_map: self._folder_names[folder_map[name]] = name elif name.upper() in default_folder_map: self._folder_names[default_folder_map[name.upper()]] = name else: matched = False for flag in flags: if flag in flag_to_folder_map: self._folder_names[flag_to_folder_map[flag]] = name matched = True if not matched: self._folder_names.setdefault('extra', list()).append(name) # TODO: support subfolders # Create any needed folders that don't exist on the backend needed_folders = set( ['inbox', 'drafts', 'sent', 'spam', 'trash', 'archive']) needed_folders -= set(self._folder_names.keys()) for folder_id in needed_folders: name = folder_id.capitalize() self.create_folder(name) with session_scope() as db_session: account = db_session.query(Account).get(self.account_id) folder = Folder.find_or_create(db_session, account, name, folder_id) setattr(account, folder_id + '_folder', folder) db_session.commit() self._folder_names[folder_id] = name return self._folder_names
def __init__( self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock, ): with session_scope(namespace_id) as db_session: try: folder = (db_session.query(Folder).filter( Folder.name == folder_name, Folder.account_id == account_id).one()) except NoResultFound: raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, "foldersyncengine", account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name self.email_address = email_address if self.folder_name.lower() == "inbox": self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.polling_logged_at = 0 self.state_handlers = { "initial": self.initial_sync, "initial uidinvalid": self.resync_uids, "poll": self.poll, "poll uidinvalid": self.resync_uids, "finish": lambda: "finish", } self.setup_heartbeats() Greenlet.__init__(self) # Some generic IMAP servers are throwing UIDVALIDITY # errors forever. Instead of resyncing those servers # ad vitam, we keep track of the number of consecutive # times we got such an error and bail out if it's higher than # MAX_UIDINVALID_RESYNCS. self.uidinvalid_count = 0
def throttled(self): with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) throttled = account.throttled return throttled
def upgrade(): from inbox.ignition import main_engine from inbox.models.session import session_scope engine = main_engine(pool_size=1, max_overflow=0) Base = declarative_base() Base.metadata.reflect(engine) # ADD: op.add_column( "imapaccount", sa.Column("family_name", sa.String(length=255), nullable=True) ) op.add_column( "imapaccount", sa.Column("g_gender", sa.String(length=16), nullable=True) ) op.add_column( "imapaccount", sa.Column("g_locale", sa.String(length=16), nullable=True) ) op.add_column( "imapaccount", sa.Column("g_picture_url", sa.String(length=255), nullable=True) ) op.add_column( "imapaccount", sa.Column("g_plus_url", sa.String(length=255), nullable=True) ) op.add_column( "imapaccount", sa.Column("given_name", sa.String(length=255), nullable=True) ) op.add_column( "imapaccount", sa.Column("google_id", sa.String(length=255), nullable=True) ) # MOVE: class Account_(Base): __table__ = Base.metadata.tables["account"] with session_scope() as db_session: results = db_session.query( Account_.id, Account_.family_name, Account_.google_id, Account_.g_plus_url, Account_.g_picture_url, Account_.g_gender, Account_.given_name, Account_.g_locale, ).all() imapaccount = table( "imapaccount", column("id", sa.String), column("family_name", sa.String), column("google_id", sa.String), column("g_plus_url", sa.String), column("g_picture_url", sa.String), column("g_gender", sa.String), column("given_name", sa.String), column("g_locale", sa.String), ) for r in results: op.execute( imapaccount.update() .where(imapaccount.c.id == r[0]) .values( { "family_name": r[1], "google_id": r[2], "g_plus_url": r[3], "g_picture_url": r[4], "g_gender": r[5], "given_name": r[6], "g_locale": r[7], } ) ) # DROP: op.drop_column("account", "family_name") op.drop_column("account", "google_id") op.drop_column("account", "g_plus_url") op.drop_column("account", "g_picture_url") op.drop_column("account", "g_gender") op.drop_column("account", "given_name") op.drop_column("account", "g_locale")
def _batch_delete(engine, table, column_id_filters, account_id, throttle=False, dry_run=False): (column, id_) = column_id_filters count = engine.execute("SELECT COUNT(*) FROM {} WHERE {}={};".format( table, column, id_)).scalar() if count == 0: log.info("Completed batch deletion", table=table) return batches = int(math.ceil(float(count) / CHUNK_SIZE)) log.info("Starting batch deletion", table=table, count=count, batches=batches) start = time.time() if table in ("message", "block"): query = "" else: query = "DELETE FROM {} WHERE {}={} LIMIT {};".format( table, column, id_, CHUNK_SIZE) log.info("deleting", account_id=account_id, table=table) for _ in range(0, batches): if throttle: bulk_throttle() if table == "block": with session_scope(account_id) as db_session: blocks = list( db_session.query(Block.id, Block.data_sha256).filter( Block.namespace_id == id_).limit(CHUNK_SIZE)) blocks = list(blocks) block_ids = [b[0] for b in blocks] block_hashes = [b[1] for b in blocks] # XXX: We currently don't check for existing blocks. if dry_run is False: delete_from_blockstore(*block_hashes) with session_scope(account_id) as db_session: query = db_session.query(Block).filter(Block.id.in_(block_ids)) if dry_run is False: query.delete(synchronize_session=False) elif table == "message": with session_scope(account_id) as db_session: # messages must be order by the foreign key `received_date` # otherwise MySQL will raise an error when deleting # from the message table messages = list( db_session.query(Message.id, Message.data_sha256).filter( Message.namespace_id == id_).order_by( desc(Message.received_date)).limit(CHUNK_SIZE). with_hint( Message, "use index (ix_message_namespace_id_received_date)")) message_ids = [m[0] for m in messages] message_hashes = [m[1] for m in messages] with session_scope(account_id) as db_session: existing_hashes = list( db_session.query(Message.data_sha256).filter( Message.data_sha256.in_(message_hashes)).filter( Message.namespace_id != id_).distinct()) existing_hashes = [h[0] for h in existing_hashes] remove_hashes = set(message_hashes) - set(existing_hashes) if dry_run is False: delete_from_blockstore(*list(remove_hashes)) with session_scope(account_id) as db_session: query = db_session.query(Message).filter( Message.id.in_(message_ids)) if dry_run is False: query.delete(synchronize_session=False) else: if dry_run is False: engine.execute(query) else: log.debug(query) end = time.time() log.info("Completed batch deletion", time=end - start, table=table) count = engine.execute("SELECT COUNT(*) FROM {} WHERE {}={};".format( table, column, id_)).scalar() if dry_run is False: assert count == 0
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = (db_session.query(Account).join(Namespace).filter( Namespace.id == namespace_id).one()) except NoResultFound: raise AccountDeletionErrror("Could not find account in database") if not account.is_marked_for_deletion: raise AccountDeletionErrror( "Account is_marked_for_deletion is False. " "Change this to proceed with deletion.") account_id = account.id account_discriminator = account.discriminator log.info("Deleting account", account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in [ "message", "block", "thread", "transaction", "actionlog", "event", "contact", "dataprocessingcache", ]: filters[table] = ("namespace_id", namespace_id) if account_discriminator == "easaccount": filters["easuid"] = ("easaccount_id", account_id) filters["easfoldersyncstatus"] = ("account_id", account_id) else: filters["imapuid"] = ("account_id", account_id) filters["imapfoldersyncstatus"] = ("account_id", account_id) filters["imapfolderinfo"] = ("account_id", account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], account_id, throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = "DELETE FROM {} WHERE {}={};" filters = OrderedDict() for table in ("category", "calendar"): filters[table] = ("namespace_id", namespace_id) for table in ("folder", "label"): filters[table] = ("account_id", account_id) filters["namespace"] = ("id", namespace_id) for table, (column, id_) in iteritems(filters): log.info("Performing bulk deletion", table=table) start = time.time() if throttle: bulk_throttle() if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info("Completed bulk deletion", table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time)
def start_hook(self, hook_public_id): with session_scope() as db_session: hook = db_session.query(Webhook). \ filter_by(public_id=hook_public_id).one() self._start_hook(hook, db_session)
def _report_initial_sync_end(self): with session_scope(self.namespace_id) as db_session: q = db_session.query(Folder).get(self.folder_id) q.initial_sync_end = datetime.utcnow()
def _get_access_token(self): with session_scope() as db_session: acc = db_session.query(Account).get(self.account_id) # This will raise OAuthError if OAuth access was revoked. The # BaseSyncMonitor loop will catch this, clean up, and exit. return token_manager.get_token(acc)
def _run_impl(self): old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + " uidinvalid" self.uidinvalid_count += 1 self.heartbeat_status.publish(state=self.state) # Check that we're not stuck in an endless uidinvalidity resync loop. if self.uidinvalid_count > MAX_UIDINVALID_RESYNCS: log.error( "Resynced more than MAX_UIDINVALID_RESYNCS in a" " row. Stopping sync.", folder_name=self.folder_name, ) # Only stop syncing the entire account if the INBOX folder is # failing. Otherwise simply stop syncing the folder. if self.folder_name.lower() == "inbox": with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get( self.account_id) account.disable_sync("Detected endless uidvalidity " "resync loop") account.sync_state = "stopped" db_session.commit() raise MailsyncDone() else: self.state = "finish" self.heartbeat_status.publish(state=self.state) except FolderMissingError: # Folder was deleted by monitor while its sync was running. # TODO: Monitor should handle shutting down the folder engine. log.info( "Folder disappeared. Stopping sync.", account_id=self.account_id, folder_id=self.folder_id, ) raise MailsyncDone() except ValidationError as exc: log.error( "Error authenticating; stopping sync", exc_info=True, account_id=self.account_id, folder_id=self.folder_id, logstash_tag="mark_invalid", ) with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(exc) raise MailsyncDone() # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: def update(status): status.state = self.state self.update_folder_sync_status(update) if self.state == old_state and self.state in ["initial", "poll"]: # We've been through a normal state transition without raising any # error. It's safe to reset the uidvalidity counter. self.uidinvalid_count = 0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge( 'mailsync.{}.account_deletion.queue.length'.format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def delete_namespace(account_id, namespace_id, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. """ from inbox.models.session import session_scope from inbox.models import Account from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) # Chunk delete for tables that might have a large concurrent write volume # to prevent those transactions from blocking. # NOTE: ImapFolderInfo does not fall into this category but we include it # here for simplicity. filters = OrderedDict() for table in [ 'message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache' ]: filters[table] = ('namespace_id', namespace_id) with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if account.discriminator != 'easaccount': filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) else: filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) for cls in filters: _batch_delete(engine, cls, filters[cls], dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): print 'Performing bulk deletion for table: {}'.format(table) start = time.time() if not dry_run: engine.execute(query.format(table, column, id_)) else: print query.format(table, column, id_) end = time.time() print 'Completed bulk deletion for table: {}, time taken: {}'.\ format(table, end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit()
def condstore_base_poll(crispin_client, log, folder_name, shared_state, highestmodseq_fn): """ Base polling logic for IMAP servers which support CONDSTORE and IDLE. The CONDSTORE / HIGHESTMODSEQ mechanism is used to detect new and changed messages that need syncing. """ log.bind(state='poll') with session_scope(ignore_soft_deletes=False) as db_session: saved_folder_info = account.get_folder_info(crispin_client.account_id, db_session, folder_name) saved_highestmodseq = saved_folder_info.highestmodseq # Start a session since we're going to IDLE below anyway... # This also resets the folder name cache, which we want in order to # detect folder/label additions and deletions. status = crispin_client.select_folder( folder_name, uidvalidity_cb(crispin_client.account_id)) log.debug(current_modseq=status['HIGHESTMODSEQ'], saved_modseq=saved_highestmodseq) if status['HIGHESTMODSEQ'] > saved_highestmodseq: with session_scope(ignore_soft_deletes=False) as db_session: acc = db_session.query(ImapAccount).get(crispin_client.account_id) save_folder_names(log, acc, crispin_client.folder_names(), db_session) highestmodseq_update(crispin_client, log, folder_name, saved_highestmodseq, highestmodseq_fn, shared_state['syncmanager_lock']) # We really only want to idle on a folder for new messages. Idling on # `All Mail` won't tell us when messages are archived from the Inbox if folder_name.lower() in IDLE_FOLDERS: status = crispin_client.select_folder( folder_name, uidvalidity_cb(crispin_client.account_id)) idle_frequency = 1800 # 30min log.info('idling', timeout=idle_frequency) crispin_client.conn.idle() crispin_client.conn.idle_check(timeout=idle_frequency) # If we want to do something with the response, but lousy # because it uses sequence IDs instead of UIDs # resp = c.idle_check(timeout=shared_state['poll_frequency']) # r = dict( EXISTS=[], EXPUNGE=[]) # for msg_uid, cmd in resp: # r[cmd].append(msg_uid) # print r crispin_client.conn.idle_done() log.info('IDLE triggered poll') else: log.info('IDLE sleeping', seconds=shared_state['poll_frequency']) sleep(shared_state['poll_frequency']) return 'poll'
def upgrade(): easupdate = False print 'Creating new tables and columns...' op.create_table( 'folder', sa.Column('id', sa.Integer(), nullable=False), sa.Column('account_id', sa.Integer(), nullable=False), sa.Column('name', sa.String(length=191, collation='utf8mb4_general_ci'), nullable=True), sa.ForeignKeyConstraint(['account_id'], ['account.id'], ondelete='CASCADE'), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('account_id', 'name')) op.create_table( 'internaltag', sa.Column('id', sa.Integer(), nullable=False), sa.Column('public_id', mysql.BINARY(16), nullable=False), sa.Column('namespace_id', sa.Integer(), nullable=False), sa.Column('name', sa.String(length=191), nullable=False), sa.Column('thread_id', sa.Integer(), nullable=False), sa.ForeignKeyConstraint(['namespace_id'], ['namespace.id'], ondelete='CASCADE'), sa.ForeignKeyConstraint(['thread_id'], ['thread.id'], ondelete='CASCADE'), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('namespace_id', 'name')) op.add_column('folderitem', sa.Column('folder_id', sa.Integer(), nullable=True)) op.create_foreign_key("fk_folder_id", "folderitem", "folder", ["folder_id"], ["id"], ondelete='CASCADE') op.add_column('account', sa.Column('inbox_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('sent_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('drafts_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('spam_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('trash_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('archive_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('all_folder_id', sa.Integer, nullable=True)) op.add_column('account', sa.Column('starred_folder_id', sa.Integer, nullable=True)) op.create_foreign_key('account_ibfk_2', 'account', 'folder', ['inbox_folder_id'], ['id']) op.create_foreign_key('account_ibfk_3', 'account', 'folder', ['sent_folder_id'], ['id']) op.create_foreign_key('account_ibfk_4', 'account', 'folder', ['drafts_folder_id'], ['id']) op.create_foreign_key('account_ibfk_5', 'account', 'folder', ['spam_folder_id'], ['id']) op.create_foreign_key('account_ibfk_6', 'account', 'folder', ['trash_folder_id'], ['id']) op.create_foreign_key('account_ibfk_7', 'account', 'folder', ['archive_folder_id'], ['id']) op.create_foreign_key('account_ibfk_8', 'account', 'folder', ['all_folder_id'], ['id']) op.create_foreign_key('account_ibfk_9', 'account', 'folder', ['starred_folder_id'], ['id']) op.add_column('imapuid', sa.Column('folder_id', sa.Integer, nullable=True)) op.create_foreign_key('imapuid_ibfk_3', 'imapuid', 'folder', ['folder_id'], ['id']) from inbox.models.session import session_scope from inbox.ignition import main_engine engine = main_engine(pool_size=1, max_overflow=0) Base = declarative_base() Base.metadata.reflect(engine) if 'easuid' in Base.metadata.tables: easupdate = True print 'Adding new EASUid columns...' op.add_column('easuid', sa.Column('fld_uid', sa.Integer(), nullable=True)) op.add_column('easuid', sa.Column('folder_id', sa.Integer(), nullable=True)) op.create_foreign_key('easuid_ibfk_3', 'easuid', 'folder', ['folder_id'], ['id']) op.create_unique_constraint( 'uq_easuid_folder_id_msg_uid_easaccount_id', 'easuid', ['folder_id', 'msg_uid', 'easaccount_id']) op.create_index('easuid_easaccount_id_folder_id', 'easuid', ['easaccount_id', 'folder_id']) # Include our changes to the EASUid table: Base = declarative_base() Base.metadata.reflect(engine) class Folder(Base): __table__ = Base.metadata.tables['folder'] account = relationship('Account', foreign_keys='Folder.account_id', backref='folders') class FolderItem(Base): __table__ = Base.metadata.tables['folderitem'] folder = relationship('Folder', backref='threads', lazy='joined') class Thread(Base): __table__ = Base.metadata.tables['thread'] folderitems = relationship('FolderItem', backref="thread", single_parent=True, cascade='all, delete, delete-orphan') namespace = relationship('Namespace', backref='threads') class Namespace(Base): __table__ = Base.metadata.tables['namespace'] account = relationship('Account', backref=backref('namespace', uselist=False)) class Account(Base): __table__ = Base.metadata.tables['account'] inbox_folder = relationship('Folder', foreign_keys='Account.inbox_folder_id') sent_folder = relationship('Folder', foreign_keys='Account.sent_folder_id') drafts_folder = relationship('Folder', foreign_keys='Account.drafts_folder_id') spam_folder = relationship('Folder', foreign_keys='Account.spam_folder_id') trash_folder = relationship('Folder', foreign_keys='Account.trash_folder_id') starred_folder = relationship('Folder', foreign_keys='Account.starred_folder_id') archive_folder = relationship('Folder', foreign_keys='Account.archive_folder_id') all_folder = relationship('Folder', foreign_keys='Account.all_folder_id') class ImapUid(Base): __table__ = Base.metadata.tables['imapuid'] folder = relationship('Folder', backref='imapuids', lazy='joined') if easupdate: class EASUid(Base): __table__ = Base.metadata.tables['easuid'] folder = relationship('Folder', foreign_keys='EASUid.folder_id', backref='easuids', lazy='joined') print 'Creating Folder rows and migrating FolderItems...' # not many folders per account, so shouldn't grow that big with session_scope(versioned=False, ignore_soft_deletes=False) as db_session: folders = dict([((i.account_id, i.name), i) for i in db_session.query(Folder).all()]) count = 0 for folderitem in db_session.query(FolderItem).join(Thread).join( Namespace).yield_per(CHUNK_SIZE): account_id = folderitem.thread.namespace.account_id if folderitem.thread.namespace.account.provider == 'gmail': if folderitem.folder_name in folder_name_subst_map: new_folder_name = folder_name_subst_map[ folderitem.folder_name] else: new_folder_name = folderitem.folder_name elif folderitem.thread.namespace.account.provider == 'eas': new_folder_name = folderitem.folder_name.title() if (account_id, new_folder_name) in folders: f = folders[(account_id, new_folder_name)] else: f = Folder(account_id=account_id, name=new_folder_name) folders[(account_id, new_folder_name)] = f folderitem.folder = f count += 1 if count > CHUNK_SIZE: db_session.commit() count = 0 db_session.commit() print 'Migrating ImapUids to reference Folder rows...' for imapuid in db_session.query(ImapUid).yield_per(CHUNK_SIZE): account_id = imapuid.imapaccount_id if imapuid.folder_name in folder_name_subst_map: new_folder_name = folder_name_subst_map[imapuid.folder_name] else: new_folder_name = imapuid.folder_name if (account_id, new_folder_name) in folders: f = folders[(account_id, new_folder_name)] else: f = Folder(account_id=account_id, name=new_folder_name) folders[(account_id, new_folder_name)] = f imapuid.folder = f count += 1 if count > CHUNK_SIZE: db_session.commit() count = 0 db_session.commit() if easupdate: print 'Migrating EASUids to reference Folder rows...' for easuid in db_session.query(EASUid).yield_per(CHUNK_SIZE): account_id = easuid.easaccount_id new_folder_name = easuid.folder_name if (account_id, new_folder_name) in folders: f = folders[(account_id, new_folder_name)] else: f = Folder(account_id=account_id, name=new_folder_name) folders[(account_id, new_folder_name)] = f easuid.folder = f count += 1 if count > CHUNK_SIZE: db_session.commit() count = 0 db_session.commit() print 'Migrating *_folder_name fields to reference Folder rows...' for account in db_session.query(Account).filter_by(provider='gmail'): if account.inbox_folder_name: # hard replace INBOX with canonicalized caps k = (account.id, 'Inbox') if k in folders: account.inbox_folder = folders[k] else: account.inbox_folder = Folder( account_id=account.id, name=folder_name_subst_map[account.inbox_folder_name]) if account.sent_folder_name: k = (account.id, account.sent_folder_name) if k in folders: account.sent_folder = folders[k] else: account.sent_folder = Folder(account_id=account.id, name=account.sent_folder_name) if account.drafts_folder_name: k = (account.id, account.drafts_folder_name) if k in folders: account.drafts_folder = folders[k] else: account.drafts_folder = Folder( account_id=account.id, name=account.drafts_folder_name) # all/archive mismatch is intentional; semantics have changed if account.archive_folder_name: k = (account.id, account.archive_folder_name) if k in folders: account.all_folder = folders[k] else: account.all_folder = Folder( account_id=account.id, name=account.archive_folder_name) db_session.commit() if easupdate: print "Migrating EAS accounts' *_folder_name fields to reference "\ "Folder rows..." for account in db_session.query(Account).filter_by(provider='eas'): if account.inbox_folder_name: k = (account.id, account.inbox_folder_name) if k in folders: account.inbox_folder = folders[k] else: account.inbox_folder = Folder( account_id=account.id, name=account.inbox_folder_name) if account.sent_folder_name: k = (account.id, account.sent_folder_name) if k in folders: account.sent_folder = folders[k] else: account.sent_folder = Folder( account_id=account.id, name=account.sent_folder_name) if account.drafts_folder_name: k = (account.id, account.drafts_folder_name) if k in folders: account.drafts_folder = folders[k] else: account.drafts_folder = Folder( account_id=account.id, name=account.drafts_folder_name) if account.archive_folder_name: k = (account.id, account.archive_folder_name) if k in folders: account.archive_folder = folders[k] else: account.archive_folder = Folder( account_id=account.id, name=account.archive_folder_name) db_session.commit() print 'Final schema tweaks and new constraint enforcement' op.alter_column('folderitem', 'folder_id', existing_type=sa.Integer(), nullable=False) op.drop_constraint('folder_name', 'folderitem', type_='unique') op.drop_constraint('folder_name', 'imapuid', type_='unique') op.create_unique_constraint('uq_imapuid_folder_id_msg_uid_imapaccount_id', 'imapuid', ['folder_id', 'msg_uid', 'imapaccount_id']) op.drop_column('folderitem', 'folder_name') op.drop_column('imapuid', 'folder_name') op.drop_column('account', 'inbox_folder_name') op.drop_column('account', 'drafts_folder_name') op.drop_column('account', 'sent_folder_name') op.drop_column('account', 'archive_folder_name') if easupdate: print 'Dropping old EASUid columns...' op.drop_constraint('folder_name', 'easuid', type_='unique') op.drop_index('easuid_easaccount_id_folder_name', 'easuid') op.drop_column('easuid', 'folder_name')
def check_new_uids(account_id, folder_name, log, uid_download_stack, poll_frequency, syncmanager_lock): """ Check for new UIDs and add them to the download stack. We do this by comparing local UID lists to remote UID lists, maintaining the invariant that (stack uids)+(local uids) == (remote uids). We also remove local messages that have disappeared from the remote, since it's totally probable that users will be archiving mail as the initial sync goes on. We grab a new IMAP connection from the pool for this to isolate its actions from whatever the main greenlet may be doing. Runs until killed. (Intended to be run in a greenlet.) """ log.info("starting new UID-check poller") with _pool(account_id).get() as crispin_client: crispin_client.select_folder(folder_name, uidvalidity_cb(crispin_client.account_id)) while True: remote_uids = set(crispin_client.all_uids()) # We lock this section to make sure no messages are being # created while we make sure the queue is in a good state. with syncmanager_lock: log.debug("check_new_uids acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: local_uids = set( account.all_uids(account_id, db_session, folder_name)) stack_uids = set(uid_download_stack.queue) local_with_pending_uids = local_uids | stack_uids deleted_uids = remove_deleted_uids(account_id, db_session, log, folder_name, local_uids, remote_uids) log.info('remoted deleted uids', count=len(deleted_uids)) # filter out messages that have disappeared on the # remote side new_uid_download_stack = { u for u in uid_download_stack.queue if u in remote_uids } # add in any new uids from the remote for uid in remote_uids: if uid not in local_with_pending_uids: log.debug( "adding new message {} to download queue". format(uid)) new_uid_download_stack.add(uid) uid_download_stack.queue = sorted(new_uid_download_stack, key=int) update_uid_counts( db_session, log, crispin_client.account_id, folder_name, remote_uid_count=len(remote_uids), download_uid_count=uid_download_stack.qsize(), delete_uid_count=len(deleted_uids)) sleep(poll_frequency)
def _smtp_oauth2_try_refresh(self): with session_scope(self.account_id) as db_session: account = db_session.query(ImapAccount).get(self.account_id) self.auth_token = token_manager.get_token(account, force_refresh=True)
def base_poll(account_id, provider_instance, last_sync_fn, target_obj, set_last_sync_fn): """Query a remote provider for updates and persist them to the database. Parameters ---------- account_id: int ID for the account whose items should be queried. db_session: sqlalchemy.orm.session.Session Database session provider: Interface to the remote item data provider. Must have a PROVIDER_NAME attribute and implement the get() method. """ log = logger.new(account_id=account_id) provider_name = provider_instance.PROVIDER_NAME with session_scope() as db_session: account = db_session.query(Account).get(account_id) last_sync = or_none(last_sync_fn(account), datetime.datetime.isoformat) items = provider_instance.get_items(last_sync) with session_scope() as db_session: account = db_session.query(Account).get(account_id) change_counter = Counter() to_commit = [] for item in items: item.namespace = account.namespace assert item.uid is not None, \ 'Got remote item with null uid' assert isinstance(item.uid, str) target_obj = target_obj matching_items = db_session.query(target_obj).filter( target_obj.namespace == account.namespace, target_obj.provider_name == provider_name, target_obj.uid == item.uid) # Snapshot of item data from immediately after last sync: cached_item = matching_items. \ filter(target_obj.source == 'remote').first() # Item data reflecting any local modifications since the last # sync with the remote provider: local_item = matching_items. \ filter(target_obj.source == 'local').first() # If the remote item was deleted, purge the corresponding # database entries. if item.deleted: if cached_item is not None: db_session.delete(cached_item) change_counter['deleted'] += 1 if local_item is not None: db_session.delete(local_item) continue # Otherwise, update the database. if cached_item is not None: # The provider gave an update to a item we already have. if local_item is not None: try: # Attempt to merge remote updates into local_item local_item.merge_from(cached_item, item) # And update cached_item to reflect both local and # remote updates cached_item.copy_from(local_item) except MergeError: log.error('Conflicting local and remote updates' 'to item.', local=local_item, cached=cached_item, remote=item) # For now, just don't update if conflict ing continue else: log.warning('Item already present as remote but not ' 'local item', cached_item=cached_item) cached_item.copy_from(item) change_counter['updated'] += 1 else: # This is a new item, create both local and remote DB # entries. local_item = target_obj() local_item.copy_from(item) local_item.source = 'local' to_commit.append(item) to_commit.append(local_item) change_counter['added'] += 1 set_last_sync_fn(account) log.info('sync', added=change_counter['added'], updated=change_counter['updated'], deleted=change_counter['deleted']) db_session.add_all(to_commit) db_session.commit()
def highestmodseq_update(crispin_client, log, folder_name, last_highestmodseq, highestmodseq_fn, syncmanager_lock): account_id = crispin_client.account_id new_highestmodseq = crispin_client.selected_highestmodseq new_uidvalidity = crispin_client.selected_uidvalidity log.info('starting highestmodseq update', current_highestmodseq=new_highestmodseq) changed_uids = crispin_client.new_and_updated_uids(last_highestmodseq) remote_uids = crispin_client.all_uids() local_uids = None if changed_uids: with session_scope(ignore_soft_deletes=False) as db_session: local_uids = account.all_uids(account_id, db_session, folder_name) new, updated = new_or_updated(changed_uids, local_uids) log.info(new_uid_count=len(new), updated_uid_count=len(updated)) local_uids += new with syncmanager_lock: log.debug("highestmodseq_update acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: deleted_uids = remove_deleted_uids(account_id, db_session, log, folder_name, local_uids, remote_uids) local_uids = set(local_uids) - deleted_uids update_metadata(crispin_client, log, folder_name, updated, syncmanager_lock) with session_scope(ignore_soft_deletes=False) as db_session: update_uid_counts(db_session, log, account_id, folder_name, remote_uid_count=len(remote_uids), download_uid_count=len(new), update_uid_count=len(updated), delete_uid_count=len(deleted_uids)) highestmodseq_fn(crispin_client, log, folder_name, new, updated, syncmanager_lock) else: log.info("No new or updated messages") with session_scope(ignore_soft_deletes=False) as db_session: with syncmanager_lock: log.debug("highestmodseq_update acquired syncmanager_lock") if local_uids is None: local_uids = account.all_uids(account_id, db_session, folder_name) deleted_uids = remove_deleted_uids(crispin_client.account_id, db_session, log, folder_name, local_uids, remote_uids) update_uid_counts(db_session, log, account_id, folder_name, remote_uid_count=len(remote_uids), delete_uid_count=len(deleted_uids)) account.update_folder_info(account_id, db_session, folder_name, new_uidvalidity, new_highestmodseq) db_session.commit()
def syncback_worker(semaphore, action, action_log_id, record_id, account_id, syncback_service, retry_interval=30, extra_args=None): func = ACTION_FUNCTION_MAP[action] with semaphore: log = logger.new(record_id=record_id, action_log_id=action_log_id, action=func, account_id=account_id, extra_args=extra_args) # Not ignoring soft-deleted objects here because if you, say, # delete a draft, we still need to access the object to delete it # on the remote. try: with session_scope(ignore_soft_deletes=False) as db_session: if extra_args: func(account_id, record_id, db_session, extra_args) else: func(account_id, record_id, db_session) action_log_entry = db_session.query(ActionLog).get( action_log_id) action_log_entry.status = 'successful' db_session.commit() latency = round((datetime.utcnow() - action_log_entry.created_at).total_seconds(), 2) log.info('syncback action completed', action_id=action_log_id, latency=latency) syncback_service.remove_from_schedule(action_log_id) except Exception as e: # To reduce error-reporting noise, don't ship to Sentry # if not actionable. if isinstance(e, ProviderSpecificException): log.warning('Uncaught error', exc_info=True) else: log_uncaught_errors(log, account_id=account_id) with session_scope() as db_session: action_log_entry = db_session.query(ActionLog).get( action_log_id) action_log_entry.retries += 1 if action_log_entry.retries == ACTION_MAX_NR_OF_RETRIES: log.critical('Max retries reached, giving up.', action_id=action_log_id, account_id=account_id, exc_info=True) action_log_entry.status = 'failed' db_session.commit() # Wait for a bit before retrying gevent.sleep(retry_interval) # Remove the entry from the scheduled set so that it can be # retried or given up on. syncback_service.remove_from_schedule(action_log_id) # Again, don't raise on exceptions that require # provider-specific handling e.g. EAS if not isinstance(e, ProviderSpecificException): raise
def _smtp_oauth2_try_refresh(self): with session_scope() as db_session: account = db_session.query(ImapAccount).get(self.account_id) self.auth_token = account.renew_access_token()
def sync_deltas(): g.parser.add_argument('cursor', type=valid_public_id, location='args', required=True) g.parser.add_argument('exclude_types', type=valid_delta_object_types, location='args') g.parser.add_argument('include_types', type=valid_delta_object_types, location='args') g.parser.add_argument('timeout', type=int, default=LONG_POLL_REQUEST_TIMEOUT, location='args') # TODO(emfree): should support `expand` parameter in delta endpoints. args = strict_parse_args(g.parser, request.args) exclude_types = args.get('exclude_types') include_types = args.get('include_types') cursor = args['cursor'] timeout = args['timeout'] if include_types and exclude_types: return err( 400, "Invalid Request. Cannot specify both include_types" "and exclude_types") if cursor == '0': start_pointer = 0 else: try: start_pointer, = g.db_session.query(Transaction.id). \ filter(Transaction.public_id == cursor, Transaction.namespace_id == g.namespace.id).one() except NoResultFound: raise InputError('Invalid cursor parameter') # The client wants us to wait until there are changes g.db_session.close() # hack to close the flask session poll_interval = 1 start_time = time.time() while time.time() - start_time < timeout: with session_scope() as db_session: deltas, _ = delta_sync.format_transactions_after_pointer( g.namespace, start_pointer, db_session, args['limit'], exclude_types, include_types) response = { 'cursor_start': cursor, 'deltas': deltas, } if deltas: response['cursor_end'] = deltas[-1]['cursor'] return g.encoder.jsonify(response) # No changes. perhaps wait elif '/delta/longpoll' in request.url_rule.rule: gevent.sleep(poll_interval) else: # Return immediately response['cursor_end'] = cursor return g.encoder.jsonify(response) # If nothing happens until timeout, just return the end of the cursor response['cursor_end'] = cursor return g.encoder.jsonify(response)
def upgrade(): from inbox.models.session import session_scope from inbox.models.folder import Folder from inbox.sqlalchemy_ext.util import JSON from inbox.ignition import main_engine engine = main_engine(pool_size=1, max_overflow=0) ### foldersync => imapfoldersyncstatus # note that renaming a table does in fact migrate constraints + indexes too op.rename_table('foldersync', 'imapfoldersyncstatus') op.alter_column('imapfoldersyncstatus', '_sync_status', existing_type=JSON(), nullable=True, new_column_name='_metrics') op.add_column('imapfoldersyncstatus', sa.Column('folder_id', sa.Integer(), nullable=False)) ### uidvalidity => imapfolderinfo op.rename_table('uidvalidity', 'imapfolderinfo') op.alter_column('imapfolderinfo', 'uid_validity', existing_type=sa.Integer(), nullable=False, new_column_name='uidvalidity') op.alter_column('imapfolderinfo', 'highestmodseq', existing_type=sa.Integer(), nullable=True) op.drop_constraint('imapfolderinfo_ibfk_1', 'imapfolderinfo', type_='foreignkey') op.alter_column('imapfolderinfo', 'imapaccount_id', existing_type=sa.Integer(), nullable=False, new_column_name='account_id') op.create_foreign_key('imapfolderinfo_ibfk_1', 'imapfolderinfo', 'imapaccount', ['account_id'], ['id']) op.add_column('imapfolderinfo', sa.Column('folder_id', sa.Integer(), nullable=False)) ### imapuid op.drop_constraint('imapuid_ibfk_1', 'imapuid', type_='foreignkey') op.alter_column('imapuid', 'imapaccount_id', existing_type=sa.Integer(), nullable=False, new_column_name='account_id') op.create_foreign_key('imapuid_ibfk_1', 'imapuid', 'imapaccount', ['account_id'], ['id']) ### migrate data and add new constraints Base = sa.ext.declarative.declarative_base() Base.metadata.reflect(engine) if 'easfoldersync' in Base.metadata.tables: op.rename_table('easfoldersync', 'easfoldersyncstatus') op.add_column('easfoldersyncstatus', sa.Column('folder_id', sa.Integer(), nullable=False)) op.alter_column('easfoldersyncstatus', '_sync_status', existing_type=JSON(), nullable=True, new_column_name='_metrics') Base.metadata.reflect(engine) class EASFolderSyncStatus(Base): __table__ = Base.metadata.tables['easfoldersyncstatus'] class ImapFolderSyncStatus(Base): __table__ = Base.metadata.tables['imapfoldersyncstatus'] class ImapFolderInfo(Base): __table__ = Base.metadata.tables['imapfolderinfo'] with session_scope(versioned=False, ignore_soft_deletes=False) \ as db_session: folder_id_for = dict([((account_id, name.lower()), id_) for id_, account_id, name in db_session.query( Folder.id, Folder.account_id, Folder.name)]) for status in db_session.query(ImapFolderSyncStatus): print "migrating", status.folder_name status.folder_id = folder_id_for[(status.account_id, status.folder_name.lower())] db_session.commit() if 'easfoldersyncstatus' in Base.metadata.tables: for status in db_session.query(EASFolderSyncStatus): print "migrating", status.folder_name folder_id = folder_id_for.get( (status.account_id, status.folder_name.lower())) if folder_id is not None: status.folder_id = folder_id else: # EAS folder rows *may* not exist if have no messages folder = Folder(account_id=status.account_id, name=status.folder_name) db_session.add(folder) db_session.commit() status.folder_id = folder.id db_session.commit() # some weird alembic bug? need to drop and recreate this FK op.drop_constraint('easfoldersyncstatus_ibfk_1', 'easfoldersyncstatus', type_='foreignkey') op.drop_column('easfoldersyncstatus', 'folder_name') op.create_foreign_key('easfoldersyncstatus_ibfk_1', 'easfoldersyncstatus', 'easaccount', ['account_id'], ['id']) op.create_foreign_key('easfoldersyncstatus_ibfk_2', 'easfoldersyncstatus', 'folder', ['folder_id'], ['id']) op.create_unique_constraint('account_id', 'easfoldersyncstatus', ['account_id', 'folder_id']) # some weird alembic bug? need to drop and recreate this FK op.drop_constraint('imapfoldersyncstatus_ibfk_1', 'imapfoldersyncstatus', type_='foreignkey') op.drop_constraint('account_id', 'imapfoldersyncstatus', type_='unique') op.drop_column('imapfoldersyncstatus', 'folder_name') op.create_foreign_key('imapfoldersyncstatus_ibfk_1', 'imapfoldersyncstatus', 'imapaccount', ['account_id'], ['id']) op.create_foreign_key('imapfoldersyncstatus_ibfk_2', 'imapfoldersyncstatus', 'folder', ['folder_id'], ['id']) op.create_unique_constraint('account_id', 'imapfoldersyncstatus', ['account_id', 'folder_id']) with session_scope(versioned=False, ignore_soft_deletes=False) \ as db_session: for info in db_session.query(ImapFolderInfo): print "migrating", info.folder_name info.folder_id = folder_id_for[(info.account_id, info.folder_name.lower())] db_session.commit() # some weird alembic bug? need to drop and recreate this FK op.drop_constraint('imapfolderinfo_ibfk_1', 'imapfolderinfo', type_='foreignkey') op.drop_constraint('imapaccount_id', 'imapfolderinfo', type_='unique') op.drop_column('imapfolderinfo', 'folder_name') op.create_foreign_key('imapfolderinfo_ibfk_1', 'imapfolderinfo', 'imapaccount', ['account_id'], ['id']) op.create_foreign_key('imapfolderinfo_ibfk_2', 'imapfolderinfo', 'folder', ['folder_id'], ['id']) op.create_unique_constraint('imapaccount_id', 'imapfolderinfo', ['account_id', 'folder_id'])
def upgrade(): import datetime from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship from inbox.config import config from inbox.models.session import session_scope from inbox.ignition import main_engine engine = main_engine() now = datetime.datetime.now() Base = declarative_base() Base.metadata.reflect(engine) class GmailAccount(Base): __table__ = Base.metadata.tables['gmailaccount'] class Secret(Base): __table__ = Base.metadata.tables['secret'] class GmailAuthCredentials(Base): __table__ = Base.metadata.tables['gmailauthcredentials'] secret = relationship(Secret) with session_scope(versioned=False) as db_session: for acc, sec in db_session.query(GmailAccount, Secret) \ .filter(GmailAccount.refresh_token_id == Secret.id, GmailAccount.scope != None, GmailAccount.g_id_token != None) \ .all(): # Create a new GmailAuthCredentials entry if # we don't have one already if db_session.query(GmailAuthCredentials, Secret) \ .filter(GmailAuthCredentials.gmailaccount_id == acc.id) \ .filter(Secret._secret == sec._secret) \ .count() == 0: # Create a new secret new_sec = Secret() new_sec.created_at = now new_sec.updated_at = now new_sec._secret = sec._secret new_sec.type = sec.type # 'token' new_sec.encryption_scheme = sec.encryption_scheme # Create a new GmailAuthCredentials entry auth_creds = GmailAuthCredentials() auth_creds.gmailaccount_id = acc.id auth_creds.scopes = acc.scope auth_creds.g_id_token = acc.g_id_token auth_creds.created_at = now auth_creds.updated_at = now auth_creds.secret = new_sec auth_creds.client_id = \ (acc.client_id or config.get_required('GOOGLE_OAUTH_CLIENT_ID')) auth_creds.client_secret = \ (acc.client_secret or config.get_required('GOOGLE_OAUTH_CLIENT_SECRET')) db_session.add(auth_creds) db_session.add(new_sec) db_session.commit()
def populate(): # Populate new classes from the existing data from inbox.models.event import (Event, RecurringEvent, RecurringEventOverride) from inbox.models.session import session_scope from inbox.events.util import parse_datetime from inbox.events.recurring import link_events with session_scope() as db: # Redo recurrence rule population, since we extended the column length print "Repopulating max-length recurrences...", for e in db.query(Event).filter( sa.func.length(Event.recurrence) > 250): try: raw_data = json.loads(e.raw_data) except: try: raw_data = ast.literal_eval(e.raw_data) except: print "Could not load raw data for event {}".format(e.id) continue e.recurrence = raw_data['recurrence'] db.commit() print "done." print "Updating types for Override...", # Slightly hacky way to convert types (only needed for one-off import) convert = """UPDATE event SET type='recurringeventoverride' WHERE raw_data LIKE '%recurringEventId%'""" db.execute(convert) create = """INSERT INTO recurringeventoverride (id) SELECT id FROM event WHERE type='recurringeventoverride' AND id NOT IN (SELECT id FROM recurringeventoverride)""" try: db.execute(create) except Exception as e: print "Couldn't insert RecurringEventOverrides: {}".format(e) exit(2) print "done." c = 0 print "Expanding Overrides .", query = db.query(RecurringEventOverride) for e in query: try: # Some raw data is str(dict), other is json.dumps raw_data = json.loads(e.raw_data) except: try: raw_data = ast.literal_eval(e.raw_data) except: print "Could not load raw data for event {}".format(e.id) continue rec_uid = raw_data.get('recurringEventId') if rec_uid: e.master_event_uid = rec_uid ost = raw_data.get('originalStartTime') if ost: # this is a dictionary with one value start_time = ost.values().pop() e.original_start_time = parse_datetime(start_time) # attempt to get the ID for the event, if we can, and # set the relationship appropriately if raw_data.get('status') == 'cancelled': e.cancelled = True link_events(db, e) c += 1 if c % 100 == 0: print ".", sys.stdout.flush() db.commit() print "done. ({} modified)".format(c) # Convert Event to RecurringEvent print "Updating types for RecurringEvent...", convert = """UPDATE event SET type='recurringevent' WHERE recurrence IS NOT NULL""" db.execute(convert) create = """INSERT INTO recurringevent (id) SELECT id FROM event WHERE type='recurringevent' AND id NOT IN (SELECT id FROM recurringevent)""" try: db.execute(create) except Exception as e: print "Couldn't insert RecurringEvents: {}".format(e) exit(2) print "done." # Pull out recurrence metadata from recurrence c = 0 print "Expanding master events .", query = db.query(RecurringEvent) for r in query: r.unwrap_rrule() try: raw_data = json.loads(r.raw_data) except: try: raw_data = ast.literal_eval(r.raw_data) except: print "Could not load raw data for event {}".format(r.id) continue r.start_timezone = raw_data['start'].get('timeZone') # find any un-found overrides that didn't have masters earlier link_events(db, r) db.add(r) c += 1 if c % 100 == 0: print ".", sys.stdout.flush() db.commit() print "done. ({} modified)".format(c) # Finally, convert all remaining Events to type='event' convert = """UPDATE event SET type='event' WHERE type IS NULL""" db.execute(convert)
def start_sync(self, account_id): """ Starts a sync for the account with the given account_id. If that account doesn't exist, does nothing. """ with self.semaphore, session_scope(account_id) as db_session: acc = db_session.query(Account).with_for_update().get(account_id) if acc is None: self.log.error("no such account", account_id=account_id) return False if not acc.sync_should_run: return False if (acc.desired_sync_host is not None and acc.desired_sync_host != self.process_identifier): return False if acc.sync_host is not None and acc.sync_host != self.process_identifier: return False self.log.info("starting sync", account_id=acc.id, email_address=acc.email_address) if acc.id in self.syncing_accounts: self.log.info("sync already started", account_id=account_id) return False try: acc.sync_host = self.process_identifier if acc.sync_email: monitor = self.monitor_cls_for[acc.provider](acc) self.email_sync_monitors[acc.id] = monitor monitor.start() info = acc.provider_info if info.get("contacts", None) and acc.sync_contacts: contact_sync = ContactSync( acc.email_address, acc.verbose_provider, acc.id, acc.namespace.id, ) self.contact_sync_monitors[acc.id] = contact_sync contact_sync.start() if info.get("events", None) and acc.sync_events: if USE_GOOGLE_PUSH_NOTIFICATIONS and acc.provider == "gmail": event_sync = GoogleEventSync( acc.email_address, acc.verbose_provider, acc.id, acc.namespace.id, ) else: event_sync = EventSync( acc.email_address, acc.verbose_provider, acc.id, acc.namespace.id, ) self.event_sync_monitors[acc.id] = event_sync event_sync.start() acc.sync_started() self.syncing_accounts.add(acc.id) # TODO (mark): Uncomment this after we've transitioned to from statsd to brubeck # statsd_client.gauge('mailsync.sync_hosts_counts.{}'.format(acc.id), 1, delta=True) db_session.commit() self.log.info("Sync started", account_id=account_id, sync_host=acc.sync_host) except Exception: self.log.error("Error starting sync", exc_info=True, account_id=account_id) return False return True