def test_kill_device_lastone(store): # If we kill a device and it's the only device publishing heartbeats for # that folder, the folder is removed when the device is removed. proxy_for(1, 2, device_id=2) clear_heartbeat_status(1, device_id=2) folders = store.get_account_folders(1) assert len(folders) == 0
def test_kill_device_lastone(store): # If we kill a device and it's the only device publishing heartbeats for # that folder, the folder is removed when the device is removed. proxy_for(1, 2, device_id=2).publish() clear_heartbeat_status(1, device_id=2) folders = store.get_account_folders(1) assert len(folders) == 0
def stop_sync(self, account_id): """ Stops the sync for the account with given account_id. If that account doesn't exist, does nothing. """ with self.semaphore: self.log.info('Stopping monitors', account_id=account_id) if account_id in self.email_sync_monitors: self.email_sync_monitors[account_id].kill() del self.email_sync_monitors[account_id] # Stop contacts sync if necessary if account_id in self.contact_sync_monitors: self.contact_sync_monitors[account_id].kill() del self.contact_sync_monitors[account_id] # Stop events sync if necessary if account_id in self.event_sync_monitors: self.event_sync_monitors[account_id].kill() del self.event_sync_monitors[account_id] self.syncing_accounts.discard(account_id) # Update database/heartbeat state with session_scope(account_id) as db_session: acc = db_session.query(Account).get(account_id) if not acc.sync_should_run: clear_heartbeat_status(acc.id) if acc.sync_stopped(self.process_identifier): self.log.info('sync stopped', account_id=account_id) r = self.queue_client.unassign(account_id, self.process_identifier) return r
def _run_impl(self): try: self.provider_instance = self.provider(self.account_id, self.namespace_id) while True: # Check to see if this greenlet should exit if self.shutdown.is_set(): clear_heartbeat_status(self.account_id, self.folder_id) return False try: self.poll() self.heartbeat_status.publish(state='poll') # If we get a connection or API permissions error, then sleep # 2x poll frequency. except ConnectionError: self.log.error('Error while polling', exc_info=True) self.heartbeat_status.publish(state='poll error') gevent.sleep(self.poll_frequency) gevent.sleep(self.poll_frequency) except ValidationError: # Bad account credentials; exit. self.log.error('Error while establishing the connection', exc_info=True) return False
def test_kill_device_multiple(store): # If we kill a device and the folder has multiple devices, don't clear # the heartbeat status proxy_for(1, 2, device_id=2) proxy_for(1, 2, device_id=3) clear_heartbeat_status(1, device_id=2) folders = store.get_account_folders(1) (f, ts) = folders[0] assert f == '2'
def test_kill_device_multiple(store): # If we kill a device and the folder has multiple devices, don't clear # the heartbeat status proxy_for(1, 2, device_id=2).publish() proxy_for(1, 2, device_id=3).publish() clear_heartbeat_status(1, device_id=2) folders = store.get_account_folders(1) (f, ts) = folders[0] assert f == "2"
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge('mailsync.{}.account_deletion.queue.length' .format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn('Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def delete_account_data(account_id, dry_run, yes, throttle): maybe_enable_rollbar() with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if not account: print("Account with id {} does NOT exist.".format(account_id)) return email_address = account.email_address namespace_id = account.namespace.id if account.sync_should_run or not account.is_marked_for_deletion: print("Account with id {} NOT marked for deletion.\n" "Will NOT delete, goodbye.".format(account_id)) return -1 if not yes: question = ( "Are you sure you want to delete all data for account with " "id: {}, email_address: {} and namespace_id: {}? [yes / no]". format(account_id, email_address, namespace_id)) answer = raw_input(question).strip().lower() if answer != "yes": print("Will NOT delete, goodbye.") return 0 print("Deleting account with id: {}...".format(account_id)) start = time.time() # Delete data in database try: print("Deleting database data") delete_namespace(namespace_id, dry_run=dry_run, throttle=throttle) except Exception as e: print("Database data deletion failed! Error: {}".format(str(e))) return -1 database_end = time.time() print("Database data deleted. Time taken: {}".format(database_end - start)) # Delete liveness data print("Deleting liveness data") clear_heartbeat_status(account_id) end = time.time() print("All data deleted successfully! TOTAL time taken: {}".format(end - start)) return 0
def test_kill_device_multiple(): # If we kill a device and the folder has multiple devices, don't clear # the heartbeat status local_store = HeartbeatStore().store() proxy_for(1, 2, device_id=2).publish() proxy_for(1, 2, device_id=3).publish() clear_heartbeat_status(1, device_id=2) folders = local_store.get_account_folders(1) assert len(folders) == 1 f, ts = folders[0] assert f == '2'
def test_kill_device_multiple(): # If we kill a device and the folder has multiple devices, don't clear # the heartbeat status local_store = HeartbeatStore().store() proxy_for(1, 2, device_id=2).publish() proxy_for(1, 2, device_id=3).publish() clear_heartbeat_status(1, device_id=2) folders = local_store.get_account_folders(1) assert len(folders) == 1 f, ts = folders[0] assert f.decode() == "2"
def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info( "Folder sync engine started", account_id=self.account_id, folder_id=folder_id, folder_name=folder_name ) thread = self.sync_engine_class( self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes, ) self.folder_monitors.start(thread) while not thread_polling(thread) and not thread_finished(thread) and not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): if thread.exception: # Exceptions causing the folder sync to exit should not # clear the heartbeat. log.info( "Folder sync engine exited with error", account_id=self.account_id, folder_id=folder_id, folder_name=folder_name, error=thread.exception, ) else: log.info( "Folder sync engine finished", account_id=self.account_id, folder_id=folder_id, folder_name=folder_name, ) # clear the heartbeat for this folder-thread since it # exited cleanly. clear_heartbeat_status(self.account_id, folder_id) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id))
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn('Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) gevent.sleep(60) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def main(host, port, account_id, folder_id, device_id): maybe_enable_rollbar() print("Clearing heartbeat status...") n = clear_heartbeat_status(account_id, folder_id, device_id, host, port) print("{} folders cleared.".format(n)) exit(0)
def start_new_folder_sync_engines(self, folders=set()): new_folders = [f for f in self.prepare_sync() if f not in folders] for folder_name, folder_id in new_folders: log.info('Folder sync engine started', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) thread = self.sync_engine_class( self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) self.folder_monitors.start(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # allow individual folder sync monitors to shut themselves down # after completing the initial sync if thread_finished(thread) or thread.ready(): if thread.exception: # Exceptions causing the folder sync to exit should not # clear the heartbeat. log.info('Folder sync engine exited with error', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name, error=thread.exception) else: log.info('Folder sync engine finished', account_id=self.account_id, folder_id=folder_id, folder_name=folder_name) # clear the heartbeat for this folder-thread since it # exited cleanly. clear_heartbeat_status(self.account_id, folder_id) # note: thread is automatically removed from # self.folder_monitors else: folders.add((folder_name, folder_id))
def stop_sync(self, account_id): """ Stops the sync for the account with given account_id. If that account doesn't exist, does nothing. """ with self.semaphore: self.log.info('Stopping monitors', account_id=account_id) if account_id in self.email_sync_monitors: self.email_sync_monitors[account_id].kill() del self.email_sync_monitors[account_id] # Stop contacts sync if necessary if account_id in self.contact_sync_monitors: self.contact_sync_monitors[account_id].kill() del self.contact_sync_monitors[account_id] # Stop events sync if necessary if account_id in self.event_sync_monitors: self.event_sync_monitors[account_id].kill() del self.event_sync_monitors[account_id] # Update database/heartbeat state with session_scope(account_id) as db_session: acc = db_session.query(Account).get(account_id) if not acc.sync_should_run: clear_heartbeat_status(acc.id) if not acc.sync_stopped(self.process_identifier): self.syncing_accounts.discard(account_id) return False self.log.info('sync stopped', account_id=account_id) # TODO (mark): Uncomment this after we've transitioned to from statsd to brubeck # statsd_client.gauge('mailsync.sync_hosts_counts.{}'.format(acc.id), -1, delta=True) db_session.commit() self.syncing_accounts.discard(account_id) return True
def delete_marked_accounts(shard_id, ids_to_delete, throttle=False, dry_run=False): start = time.time() deleted_count = 0 for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical("Account with does not exist", account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn("Account NOT marked for deletion. " "Will not delete", account_id=account_id) continue log.info("Deleting account", account_id=account_id) start_time = time.time() # Delete data in database try: log.info("Deleting database data", account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical("Database data deletion failed", error=e, account_id=account_id) continue # Delete liveness data log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info("All data deleted successfully", shard_id=shard_id, time=end - start, count=deleted_count)
def stop_sync(self, account_id): """ Stops the sync for the account with given account_id. If that account doesn't exist, does nothing. """ with self.semaphore: self.log.info("Stopping monitors", account_id=account_id) if account_id in self.email_sync_monitors: self.email_sync_monitors[account_id].kill() del self.email_sync_monitors[account_id] # Stop contacts sync if necessary if account_id in self.contact_sync_monitors: self.contact_sync_monitors[account_id].kill() del self.contact_sync_monitors[account_id] # Stop events sync if necessary if account_id in self.event_sync_monitors: self.event_sync_monitors[account_id].kill() del self.event_sync_monitors[account_id] # Update database/heartbeat state with session_scope(account_id) as db_session: acc = db_session.query(Account).get(account_id) if not acc.sync_should_run: clear_heartbeat_status(acc.id) if not acc.sync_stopped(self.process_identifier): self.syncing_accounts.discard(account_id) return False self.log.info("sync stopped", account_id=account_id) # TODO (mark): Uncomment this after we've transitioned to from statsd to brubeck # statsd_client.gauge('mailsync.sync_hosts_counts.{}'.format(acc.id), -1, delta=True) db_session.commit() self.syncing_accounts.discard(account_id) return True
def save_folder_names(log, account_id, folder_names, db_session): """ Create Folder objects & map special folder names on Account objects. Folders that belong to an account and no longer exist in `folder_names` ARE DELETED, unless they are "dangling" (do not have a 'name' set). We don't canonicalizing folder names to lowercase when saving because different backends may be case-sensitive or not. Code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(account_id) assert 'inbox' in folder_names, 'Account {} has no detected inbox folder'\ .format(account.email_address) all_folders = db_session.query(Folder).filter_by( account_id=account.id).all() # dangled_folders don't map to upstream account folders (may be used for # keeping track of e.g. special Gmail labels which are exposed as IMAP # flags but not folders) folder_for = {f.name: f for f in all_folders if f.name is not None} dangled_folder_for = { f.canonical_name: f for f in all_folders if f.name is None } canonical_names = { 'inbox', 'drafts', 'sent', 'spam', 'trash', 'starred', 'important', 'archive', 'all' } for canonical_name in canonical_names: if canonical_name in folder_names: backend_folder_name = folder_names[canonical_name] if backend_folder_name not in folder_for: # Reconcile dangled folders which now exist on the remote if canonical_name in dangled_folder_for: folder = dangled_folder_for[canonical_name] folder.name = folder_names[canonical_name] del dangled_folder_for[canonical_name] else: folder = Folder.find_or_create(db_session, account, None, canonical_name) if folder.name != folder_names[canonical_name]: if folder.name is not None: del folder_for[folder.name] folder.name = folder_names[canonical_name] folder.get_associated_tag(db_session) attr_name = '{}_folder'.format(canonical_name) id_attr_name = '{}_folder_id'.format(canonical_name) if getattr(account, id_attr_name) != folder.id: # NOTE: updating the relationship (i.e., attr_name) also # updates the associated foreign key (i.e., id_attr_name) setattr(account, attr_name, folder) else: del folder_for[backend_folder_name] # Gmail labels, user-created IMAP/EAS folders, etc. if 'extra' in folder_names: for name in folder_names['extra']: name = name[:MAX_FOLDER_NAME_LENGTH] if name not in folder_for: # Folder.create() takes care of adding to the session folder = Folder.create(account, name, db_session) folder.get_associated_tag(db_session) else: del folder_for[name] # This may cascade to FolderItems and ImapUid (ONLY), which is what we # want--doing the update here short-circuits us syncing that change later. if len(folder_for): log.info("folders deleted from remote", folders=folder_for.keys()) for name, folder in folder_for.iteritems(): db_session.delete(folder) clear_heartbeat_status(account_id, folder.id) # TODO(emfree) delete associated tag db_session.commit()
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = (db_session.query(Account).join(Namespace).filter( Namespace.id == namespace_id).one()) except NoResultFound: raise AccountDeletionErrror("Could not find account in database") if not account.is_marked_for_deletion: raise AccountDeletionErrror( "Account is_marked_for_deletion is False. " "Change this to proceed with deletion.") account_id = account.id account_discriminator = account.discriminator log.info("Deleting account", account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in [ "message", "block", "thread", "transaction", "actionlog", "event", "contact", "dataprocessingcache", ]: filters[table] = ("namespace_id", namespace_id) if account_discriminator == "easaccount": filters["easuid"] = ("easaccount_id", account_id) filters["easfoldersyncstatus"] = ("account_id", account_id) else: filters["imapuid"] = ("account_id", account_id) filters["imapfoldersyncstatus"] = ("account_id", account_id) filters["imapfolderinfo"] = ("account_id", account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], account_id, throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = "DELETE FROM {} WHERE {}={};" filters = OrderedDict() for table in ("category", "calendar"): filters[table] = ("namespace_id", namespace_id) for table in ("folder", "label"): filters[table] = ("account_id", account_id) filters["namespace"] = ("id", namespace_id) for table, (column, id_) in iteritems(filters): log.info("Performing bulk deletion", table=table) start = time.time() if throttle: bulk_throttle() if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info("Completed bulk deletion", table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time)
def start_sync(self, account_id): """ Starts a sync for the account with the given account_id. If that account doesn't exist, does nothing. """ with session_scope() as db_session: acc = db_session.query(Account).get(account_id) if acc is None: self.log.error('no such account', account_id=account_id) return fqdn = platform.node() self.log.info('starting sync', account_id=acc.id, email_address=acc.email_address) if acc.sync_host is not None and acc.sync_host != fqdn: self.log.error( 'Sync Host Mismatch', message='account is syncing on another host {}'.format( acc.sync_host), account_id=account_id) elif acc.id not in self.monitors: # Before starting the sync, clear the heartbeat and individual # folder should_run bits. These bits will be flipped to the # correct state by the mailsync monitor. try: for status in acc.foldersyncstatuses: status.sync_should_run = False except Exception as e: self.log.error('Error resetting folder run status', message=str(e.message), account_id=acc.id) try: clear_heartbeat_status(acc.id) except Exception as e: self.log.error('Error clearing heartbeat on sync start', message=str(e.message), account_id=acc.id) try: if acc.is_sync_locked and acc.is_killed: acc.sync_unlock() acc.sync_lock() monitor = self.monitor_cls_for[acc.provider](acc) self.monitors[acc.id] = monitor monitor.start() info = acc.provider_info if info.get('contacts', None) and acc.sync_contacts: contact_sync = ContactSync(acc.email_address, acc.provider, acc.id, acc.namespace.id) self.contact_sync_monitors[acc.id] = contact_sync contact_sync.start() if info.get('events', None) and acc.sync_events: event_sync = EventSync(acc.email_address, acc.provider, acc.id, acc.namespace.id) self.event_sync_monitors[acc.id] = event_sync event_sync.start() acc.sync_started() db_session.add(acc) db_session.commit() self.log.info('Sync started', account_id=account_id, sync_host=fqdn) except Exception as e: self.log.error('sync_error', message=str(e.message), account_id=account_id) else: self.log.info('sync already started', account_id=account_id)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge( 'mailsync.{}.account_deletion.queue.length'.format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def _cleanup(self): self.folder_monitors.kill() clear_heartbeat_status(self.account_id)
def _cleanup(self): with session_scope() as mailsync_db_session: map(lambda x: x.set_stopped(mailsync_db_session), self.folder_monitors) self.folder_monitors.kill() clear_heartbeat_status(self.account_id)
def save_folder_names(log, account_id, folder_names, db_session): """ Create Folder objects & map special folder names on Account objects. Folders that belong to an account and no longer exist in `folder_names` ARE DELETED, unless they are "dangling" (do not have a 'name' set). We don't canonicalize folder names to lowercase when saving because different backends may be case-sensitive or otherwise. Code that references saved folder names should canonicalize if needed when doing comparisons. """ account = db_session.query(Account).get(account_id) assert 'inbox' in folder_names, 'Account {} has no detected inbox folder'\ .format(account.email_address) all_folders = db_session.query(Folder).filter_by( account_id=account.id).all() # dangled_folders don't map to upstream account folders (may be used for # keeping track of e.g. special Gmail labels which are exposed as IMAP # flags but not folders) local_folders = {f.name: f for f in all_folders if f.name is not None} dangled_local_folders = {f.canonical_name: f for f in all_folders if f.name is None} canonical_names = {'inbox', 'drafts', 'sent', 'spam', 'trash', 'starred', 'important', 'archive', 'all'} for canonical_name in canonical_names: if canonical_name in folder_names: backend_folder_name = folder_names[canonical_name] if backend_folder_name not in local_folders: # Reconcile dangled folders which now exist on the remote if canonical_name in dangled_local_folders: folder = dangled_local_folders[canonical_name] folder.name = folder_names[canonical_name] del dangled_local_folders[canonical_name] else: folder = Folder.find_or_create( db_session, account, None, canonical_name) if folder.name != folder_names[canonical_name]: if folder.name is not None: del local_folders[folder.name] folder.name = folder_names[canonical_name] folder.get_associated_tag(db_session) attr_name = '{}_folder'.format(canonical_name) id_attr_name = '{}_folder_id'.format(canonical_name) if getattr(account, id_attr_name) != folder.id: # NOTE: updating the relationship (i.e., attr_name) also # updates the associated foreign key (i.e., id_attr_name) setattr(account, attr_name, folder) else: del local_folders[backend_folder_name] # Gmail labels, user-created IMAP/EAS folders, etc. if 'extra' in folder_names: for name in folder_names['extra']: name = name[:MAX_FOLDER_NAME_LENGTH] if name not in local_folders: # This takes care of adding the folder to the session. folder = Folder.find_or_create(db_session, account, name) folder.get_associated_tag(db_session) else: del local_folders[name] # This may cascade to FolderItems and ImapUid (ONLY), which is what we # want--doing the update here short-circuits us syncing that change later. if len(local_folders): log.info("folders deleted from remote", folders=local_folders.keys()) for name, folder in local_folders.iteritems(): tag = folder.get_associated_tag(db_session, create_if_missing=False) if tag: if tag.name in tag.CANONICAL_TAG_NAMES: log.warn("Canonical tag remotely deleted: {}".format(tag.name), account_id=account.id) db_session.delete(tag) db_session.delete(folder) clear_heartbeat_status(account_id, folder.id) db_session.commit()
def test_remove_account_from_index(store): for i in [2, 3]: proxy_for(1, i) n = clear_heartbeat_status(1) assert n == 2 assert store.get_folder_list() == []
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = db_session.query(Account).join(Namespace).filter(Namespace.id == namespace_id).one() except NoResultFound: raise AccountDeletionErrror( 'Could not find account in database') if not account.is_marked_for_deletion: raise AccountDeletionErrror( 'Account is_marked_for_deletion is False. ' 'Change this to proceed with deletion.') account_id = account.id account_discriminator = account.discriminator log.info('Deleting account', account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in ['message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache']: filters[table] = ('namespace_id', namespace_id) if account_discriminator == 'easaccount': filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) else: filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): log.info('Performing bulk deletion', table=table) start = time.time() if throttle and check_throttle(): log.info("Throttling deletion") gevent.sleep(60) if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info('Completed bulk deletion', table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time)
def start_sync(self, account_id): """ Starts a sync for the account with the given account_id. If that account doesn't exist, does nothing. """ with session_scope() as db_session: acc = db_session.query(Account).get(account_id) if acc is None: self.log.error('no such account', account_id=account_id) return fqdn = platform.node() self.log.info('starting sync', account_id=acc.id, email_address=acc.email_address) if acc.sync_host is not None and acc.sync_host != fqdn: self.log.error('Sync Host Mismatch', message='account is syncing on another host {}' .format(acc.sync_host), account_id=account_id) elif acc.id not in self.monitors: # Before starting the sync, clear the heartbeat and individual # folder should_run bits. These bits will be flipped to the # correct state by the mailsync monitor. try: for status in acc.foldersyncstatuses: status.sync_should_run = False except Exception as e: self.log.error('Error resetting folder run status', message=str(e.message), account_id=acc.id) try: clear_heartbeat_status(acc.id) except Exception as e: self.log.error('Error clearing heartbeat on sync start', message=str(e.message), account_id=acc.id) try: if acc.is_sync_locked and acc.is_killed: acc.sync_unlock() acc.sync_lock() monitor = self.monitor_cls_for[acc.provider](acc) self.monitors[acc.id] = monitor monitor.start() info = acc.provider_info if info.get('contacts', None) and acc.sync_contacts: contact_sync = ContactSync(acc.email_address, acc.provider, acc.id, acc.namespace.id) self.contact_sync_monitors[acc.id] = contact_sync contact_sync.start() if info.get('events', None) and acc.sync_events: event_sync = EventSync(acc.email_address, acc.provider, acc.id, acc.namespace.id) self.event_sync_monitors[acc.id] = event_sync event_sync.start() acc.sync_started() db_session.add(acc) db_session.commit() self.log.info('Sync started', account_id=account_id, sync_host=fqdn) except Exception as e: self.log.error('sync_error', message=str(e.message), account_id=account_id) else: self.log.info('sync already started', account_id=account_id)
def test_remove_account_from_index(store): for i in [2, 3]: proxy_for(1, i).publish() n = clear_heartbeat_status(1) assert n == 2 assert store.get_folder_list() == []