def _report_transactions_latency(self, latency): metric_names = [ "inbox-contacts-search.transactions.latency", ] for metric in metric_names: statsd_client.timing(metric, latency)
def _save_to_s3(data_sha256, data): assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!' assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!' assert 'MESSAGE_STORE_BUCKET_NAME' in config, \ 'Need bucket name to store message data!' start = time.time() # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) # See if it already exists; if so, don't recreate. key = bucket.get_key(data_sha256) if key: return key = Key(bucket) key.key = data_sha256 key.set_contents_from_string(data) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing('s3.save_latency', latency_millis)
def _save_to_s3_bucket(data_sha256, bucket_name, data): assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!' assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!' start = time.time() # Boto pools connections at the class level conn = S3Connection( config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY'), host=config.get('AWS_HOST', 's3.amazonaws.com'), port=config.get('AWS_PORT'), calling_format=boto.s3.connection.OrdinaryCallingFormat(), is_secure=config.get('AWS_USE_SSL', True)) bucket = conn.get_bucket(bucket_name, validate=False) # See if it already exists; if so, don't recreate. key = bucket.get_key(data_sha256) if key: return key = Key(bucket) key.key = data_sha256 key.set_contents_from_string(data) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing('s3_blockstore.save_latency', latency_millis)
def _report_transactions_latency(self, latency): metric_names = [ "inbox-contacts-search.transactions.latency", ] for metric in metric_names: statsd_client.timing(metric, latency)
def after_transaction_end(session, transaction): start_time = transaction_start_map.get(hash(transaction)) if not start_time: return latency = int((time.time() - start_time) * 1000) statsd_client.timing(metric_name, latency) statsd_client.incr(metric_name)
def after_transaction_end(session, transaction): start_time = transaction_start_map.get(hash(transaction)) if not start_time: return latency = int((time.time() - start_time) * 1000) statsd_client.timing(metric_name, latency) statsd_client.incr(metric_name)
def _report_message_velocity(self, timedelta, num_uids): latency = (timedelta).total_seconds() * 1000 latency_per_uid = float(latency) / num_uids metrics = [ ".".join(["providers", self.provider_name, "message_velocity"]), ".".join(["providers", "overall", "message_velocity"]), ] for metric in metrics: statsd_client.timing(metric, latency_per_uid)
def _report_message_velocity(self, timedelta, num_uids): latency = (timedelta).total_seconds() * 1000 latency_per_uid = float(latency) / num_uids metrics = [ '.'.join(['providers', self.provider_name, 'message_velocity']), '.'.join(['providers', 'overall', 'message_velocity']) ] for metric in metrics: statsd_client.timing(metric, latency_per_uid)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge('mailsync.{}.account_deletion.queue.length' .format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn('Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def _report_message_velocity(self, timedelta, num_uids): latency = (timedelta).total_seconds() * 1000 latency_per_uid = float(latency) / num_uids metrics = [ '.'.join(['providers', self.provider_name, 'message_velocity']), '.'.join(['providers', 'overall', 'message_velocity']) ] for metric in metrics: statsd_client.timing(metric, latency_per_uid)
def _log_to_statsd(self, action_log_status, latency=None): metric_names = [ "syncback.overall.{}".format(action_log_status), "syncback.providers.{}.{}".format(self.provider, action_log_status) ] for metric in metric_names: statsd_client.incr(metric) if latency: statsd_client.timing(metric, latency * 1000)
def _log_to_statsd(self, action_log_status, latency=None): metric_names = [ "syncback.overall.{}".format(action_log_status), "syncback.providers.{}.{}".format(self.provider, action_log_status) ] for metric in metric_names: statsd_client.incr(metric) if latency: statsd_client.timing(metric, latency * 1000)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = (db_session.query(ImapUid).filter( ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid, ).first()) if existing_imapuid is not None: log.error( "Expected to create imapuid, but existing row found", remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id, ) return None # Check if the message is valid. # https://sentry.nylas.com/sentry/sync-prod/group/3387/ if msg.body is None: log.warning("Server returned a message with an empty body.") return None new_uid = common.create_imap_message(db_session, acct, folder, msg) self.add_message_to_thread(db_session, new_uid.message, msg) db_session.flush() # We're calling import_attached_events here instead of some more # obvious place (like Message.create_from_synced) because the function # requires new_uid.message to have been flushed. # This is necessary because the import_attached_events does db lookups. if new_uid.message.has_attached_events: with db_session.no_autoflush: import_attached_events(db_session, acct, new_uid.message) # If we're in the polling state, then we want to report the metric # for latency when the message was received vs created if self.state == "poll": latency_millis = (datetime.utcnow() - new_uid.message.received_date ).total_seconds() * 1000 metrics = [ ".".join( ["mailsync", "providers", "overall", "message_latency"]), ".".join([ "mailsync", "providers", self.provider_name, "message_latency" ]), ] for metric in metrics: statsd_client.timing(metric, latency_millis) return new_uid
def _report_message_velocity(self, timedelta, num_uids): latency = (timedelta).total_seconds() * 1000 latency_per_uid = float(latency) / num_uids metrics = [ ".".join([ "mailsync", "providers", self.provider_name, "message_velocity" ]), ".".join(["mailsync", "providers", "overall", "message_velocity"]), ] for metric in metrics: statsd_client.timing(metric, latency_per_uid)
def end(session): start_time = transaction_start_map.get(session) if not start_time: return del transaction_start_map[session] t = time.time() latency = int((t - start_time) * 1000) statsd_client.timing(metric_name, latency) statsd_client.incr(metric_name) if latency > MAX_SANE_TRX_TIME_MS: log.warning('Long transaction', latency=latency, modname=modname, funcname=funcname)
def _report_first_message(self): now = datetime.utcnow() with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account_created = account.created_at latency = (now - account_created).total_seconds() * 1000 metrics = [ '.'.join(['providers', self.provider_name, 'first_message']), '.'.join(['providers', 'overall', 'first_message']) ] for metric in metrics: statsd_client.timing(metric, latency)
def _report_first_message(self): now = datetime.utcnow() with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account_created = account.created_at latency = (now - account_created).total_seconds() * 1000 metrics = [ '.'.join(['providers', self.provider_name, 'first_message']), '.'.join(['providers', 'overall', 'first_message']) ] for metric in metrics: statsd_client.timing(metric, latency)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn('Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) gevent.sleep(60) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = db_session.query(ImapUid).filter( ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid).first() if existing_imapuid is not None: log.error('Expected to create imapuid, but existing row found', remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id) return None # Check if the message is valid. # https://sentry.nylas.com/sentry/sync-prod/group/3387/ if msg.body is None: log.warning('Server returned a message with an empty body.') return None new_uid = common.create_imap_message(db_session, acct, folder, msg) self.add_message_to_thread(db_session, new_uid.message, msg) db_session.flush() # We're calling import_attached_events here instead of some more # obvious place (like Message.create_from_synced) because the function # requires new_uid.message to have been flushed. # This is necessary because the import_attached_events does db lookups. if new_uid.message.has_attached_events: with db_session.no_autoflush: import_attached_events(db_session, acct, new_uid.message) # If we're in the polling state, then we want to report the metric # for latency when the message was received vs created if self.state == 'poll': latency_millis = ( datetime.utcnow() - new_uid.message.received_date) \ .total_seconds() * 1000 metrics = [ '.'.join(['accounts', 'overall', 'message_latency']), '.'.join(['providers', self.provider_name, 'message_latency']), ] for metric in metrics: statsd_client.timing(metric, latency_millis) return new_uid
def _delete_from_s3_bucket(data_sha256_hashes, bucket_name): data_sha256_hashes = [hash_ for hash_ in data_sha256_hashes if hash_] if not data_sha256_hashes: return None assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" start = time.time() # Boto pools connections at the class level bucket = get_s3_bucket(bucket_name) bucket.delete_keys([key for key in data_sha256_hashes], quiet=True) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing("s3_blockstore.delete_latency", latency_millis)
def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = ( db_session.query(ImapUid) .filter(ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid) .first() ) if existing_imapuid is not None: log.error( "Expected to create imapuid, but existing row found", remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id, ) return None new_uid = common.create_imap_message(db_session, acct, folder, msg) self.add_message_to_thread(db_session, new_uid.message, msg) db_session.flush() # We're calling import_attached_events here instead of some more # obvious place (like Message.create_from_synced) because the function # requires new_uid.message to have been flushed. # This is necessary because the import_attached_events does db lookups. if new_uid.message.has_attached_events: with db_session.no_autoflush: import_attached_events(db_session, acct, new_uid.message) # If we're in the polling state, then we want to report the metric # for latency when the message was received vs created if self.state == "poll": latency_millis = (datetime.utcnow() - new_uid.message.received_date).total_seconds() * 1000 metrics = [ ".".join(["accounts", "overall", "message_latency"]), ".".join(["accounts", str(acct.id), "message_latency"]), ".".join(["providers", self.provider_name, "message_latency"]), ] for metric in metrics: statsd_client.timing(metric, latency_millis) return new_uid
def _delete_from_s3_bucket(data_sha256_hashes, bucket_name): data_sha256_hashes = filter(None, data_sha256_hashes) if not data_sha256_hashes: return None assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" start = time.time() # Boto pools connections at the class level conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"), config.get("AWS_SECRET_ACCESS_KEY")) bucket = conn.get_bucket(bucket_name, validate=False) bucket.delete_keys([key for key in data_sha256_hashes], quiet=True) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing("s3_blockstore.delete_latency", latency_millis)
def end(session): start_time = transaction_start_map.get(session) if not start_time: return del transaction_start_map[session] t = time.time() latency = int((t - start_time) * 1000) if config.get("ENABLE_DB_TXN_METRICS", False): statsd_client.timing(metric_name, latency) statsd_client.incr(metric_name) if latency > MAX_SANE_TRX_TIME_MS: log.warning( "Long transaction", latency=latency, modname=modname, funcname=funcname, )
def create_message(self, db_session, acct, folder, msg): assert acct is not None and acct.namespace is not None # Check if we somehow already saved the imapuid (shouldn't happen, but # possible due to race condition). If so, don't commit changes. existing_imapuid = db_session.query(ImapUid).filter( ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid).first() if existing_imapuid is not None: log.error('Expected to create imapuid, but existing row found', remote_msg_uid=msg.uid, existing_imapuid=existing_imapuid.id) return None new_uid = common.create_imap_message(db_session, acct, folder, msg) self.add_message_to_thread(db_session, new_uid.message, msg) db_session.flush() # We're calling import_attached_events here instead of some more # obvious place (like Message.create_from_synced) because the function # requires new_uid.message to have been flushed. # This is necessary because the import_attached_events does db lookups. if new_uid.message.has_attached_events: with db_session.no_autoflush: import_attached_events(db_session, acct, new_uid.message) # If we're in the polling state, then we want to report the metric # for latency when the message was received vs created if self.state == 'poll': latency_millis = ( datetime.utcnow() - new_uid.message.received_date) \ .total_seconds() * 1000 metrics = [ '.'.join(['accounts', 'overall', 'message_latency']), '.'.join(['accounts', str(acct.id), 'message_latency']), '.'.join(['providers', self.provider_name, 'message_latency']), ] for metric in metrics: statsd_client.timing(metric, latency_millis) return new_uid
def _report_first_message(self): # Only record the "time to first message" in the inbox. Because users # can add more folders at any time, "initial sync"-style metrics for # other folders don't mean much. if self.folder_role not in ['inbox', 'all']: return now = datetime.utcnow() with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account_created = account.created_at latency = (now - account_created).total_seconds() * 1000 metrics = [ '.'.join(['mailsync', 'providers', self.provider_name, 'first_message']), '.'.join(['mailsync', 'providers', 'overall', 'first_message']) ] for metric in metrics: statsd_client.timing(metric, latency)
def _save_to_s3_bucket(data_sha256, bucket_name, data): # type: (str, str, bytes) -> None assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" start = time.time() # Boto pools connections at the class level bucket = get_s3_bucket(bucket_name) # See if it already exists; if so, don't recreate. key = bucket.get_key(data_sha256) if key: return key = Key(bucket) key.key = data_sha256 key.set_contents_from_string(data) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing("s3_blockstore.save_latency", latency_millis)
def _save_to_s3_bucket(data_sha256, bucket_name, data): assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" start = time.time() # Boto pools connections at the class level conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"), config.get("AWS_SECRET_ACCESS_KEY")) bucket = conn.get_bucket(bucket_name, validate=False) # See if it already exists; if so, don't recreate. key = bucket.get_key(data_sha256) if key: return key = Key(bucket) key.key = data_sha256 key.set_contents_from_string(data) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing("s3_blockstore.save_latency", latency_millis)
def _report_first_message(self): # Only record the "time to first message" in the inbox. Because users # can add more folders at any time, "initial sync"-style metrics for # other folders don't mean much. if self.folder_role not in ['inbox', 'all']: return now = datetime.utcnow() with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) account_created = account.created_at latency = (now - account_created).total_seconds() * 1000 metrics = [ '.'.join([ 'mailsync', 'providers', self.provider_name, 'first_message' ]), '.'.join(['mailsync', 'providers', 'overall', 'first_message']) ] for metric in metrics: statsd_client.timing(metric, latency)
def delete_marked_accounts(shard_id, ids_to_delete, throttle=False, dry_run=False): start = time.time() deleted_count = 0 for account_id, namespace_id in ids_to_delete: try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical("Account with does not exist", account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn("Account NOT marked for deletion. " "Will not delete", account_id=account_id) continue log.info("Deleting account", account_id=account_id) start_time = time.time() # Delete data in database try: log.info("Deleting database data", account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical("Database data deletion failed", error=e, account_id=account_id) continue # Delete liveness data log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info("All data deleted successfully", shard_id=shard_id, time=end - start, count=deleted_count)
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = (db_session.query(Account).join(Namespace).filter( Namespace.id == namespace_id).one()) except NoResultFound: raise AccountDeletionErrror("Could not find account in database") if not account.is_marked_for_deletion: raise AccountDeletionErrror( "Account is_marked_for_deletion is False. " "Change this to proceed with deletion.") account_id = account.id account_discriminator = account.discriminator log.info("Deleting account", account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in [ "message", "block", "thread", "transaction", "actionlog", "event", "contact", "dataprocessingcache", ]: filters[table] = ("namespace_id", namespace_id) if account_discriminator == "easaccount": filters["easuid"] = ("easaccount_id", account_id) filters["easfoldersyncstatus"] = ("account_id", account_id) else: filters["imapuid"] = ("account_id", account_id) filters["imapfoldersyncstatus"] = ("account_id", account_id) filters["imapfolderinfo"] = ("account_id", account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], account_id, throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = "DELETE FROM {} WHERE {}={};" filters = OrderedDict() for table in ("category", "calendar"): filters[table] = ("namespace_id", namespace_id) for table in ("folder", "label"): filters[table] = ("account_id", account_id) filters["namespace"] = ("id", namespace_id) for table, (column, id_) in iteritems(filters): log.info("Performing bulk deletion", table=table) start = time.time() if throttle: bulk_throttle() if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info("Completed bulk deletion", table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time)
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = db_session.query(Account).join(Namespace).filter(Namespace.id == namespace_id).one() except NoResultFound: raise AccountDeletionErrror( 'Could not find account in database') if not account.is_marked_for_deletion: raise AccountDeletionErrror( 'Account is_marked_for_deletion is False. ' 'Change this to proceed with deletion.') account_id = account.id account_discriminator = account.discriminator log.info('Deleting account', account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in ['message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache']: filters[table] = ('namespace_id', namespace_id) if account_discriminator == 'easaccount': filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) else: filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): log.info('Performing bulk deletion', table=table) start = time.time() if throttle and check_throttle(): log.info("Throttling deletion") gevent.sleep(60) if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info('Completed bulk deletion', table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time)
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge( 'mailsync.{}.account_deletion.queue.length'.format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)