def main(host, port): maybe_enable_rollbar() if host: status_client = _get_redis_client(host, port, STATUS_DATABASE) report_client = _get_redis_client(host, port, REPORT_DATABASE) else: status_client = get_redis_client(STATUS_DATABASE) report_client = get_redis_client(REPORT_DATABASE) status_client.flushdb() report_client.flushdb() exit(0)
def publish(self, **kwargs): schema = {'provider_name', 'folder_name', 'heartbeat_at', 'state', 'action'} def check_schema(**kwargs): for kw in kwargs: assert kw in schema try: client = get_redis_client(STATUS_DATABASE) check_schema(**kwargs) now = datetime.utcnow() self.value['heartbeat_at'] = str(now) self.value.update(kwargs or {}) client.hset(self.key, self.device_id, json.dumps(self.value)) self.heartbeat_at = now if 'action' in self.value: del self.value['action'] except Exception: log = get_logger() log.error('Error while writing the heartbeat status', account_id=self.key.account_id, folder_id=self.key.folder_id, device_id=self.device_id, exc_info=True)
def upgrade(): # Remove UIDs and sync status for inbox IMAP syncs -- otherwise # archives/deletes may not be synced correctly. from inbox.models.backends.imap import ImapFolderSyncStatus, ImapUid from inbox.models.backends.gmail import GmailAccount from inbox.models.session import session_scope from inbox.heartbeat.config import STATUS_DATABASE, get_redis_client from inbox.heartbeat.status import HeartbeatStatusKey redis_client = get_redis_client(STATUS_DATABASE) with session_scope(versioned=False) as \ db_session: for account in db_session.query(GmailAccount): if account.inbox_folder is None: # May be the case for accounts that we can't sync, e.g. due to # All Mail being disabled in IMAP. continue q = db_session.query(ImapFolderSyncStatus).filter( ImapFolderSyncStatus.account_id == account.id, ImapFolderSyncStatus.folder_id == account.inbox_folder.id) q.delete() q = db_session.query(ImapUid).filter( ImapUid.account_id == account.id, ImapUid.folder_id == account.inbox_folder.id) q.delete() db_session.commit() # Also remove the corresponding status entry from Redis. key = HeartbeatStatusKey(account.id, account.inbox_folder.id) redis_client.delete(key)
def get_accounts_folders(self, account_ids): # Preferred method of querying for multiple accounts. Uses pipelining # to reduce the number of requests to redis. client = heartbeat_config.get_redis_client(self.host, self.port) pipe = client.pipeline() for index in account_ids: pipe.zrange(index, 0, -1, withscores=True) return pipe.execute()
def store_heartbeat_report(host, port, report): if not report: return client = get_redis_client(host, port, REPORT_DATABASE) batch_client = client.pipeline() # flush the db to avoid stale information batch_client.flushdb() for name, value in report.iteritems(): batch_client.set(name, value) batch_client.execute()
def fetch_heartbeat_report(host, port): client = get_redis_client(host, port, REPORT_DATABASE) batch_client = client.pipeline() names = [] for name in client.scan_iter(count=100): if name == 'ElastiCacheMasterReplicationTimestamp': continue names.append(int(name)) batch_client.get(name) values = map(ReportEntry.from_string, batch_client.execute()) return dict(zip(names, values))
def update_accounts_index(self, key): # Find the oldest heartbeat from the account-folder index try: client = heartbeat_config.get_redis_client(self.host, self.port) f, oldest_heartbeat = client.zrange(key.account_id, 0, 0, withscores=True).pop() client.zadd('account_index', oldest_heartbeat, key.account_id) except: # If all heartbeats were deleted at the same time as this, the pop # will fail -- ignore it. pass
def update_accounts_index(self, key): # Find the oldest heartbeat from the account-folder index try: client = heartbeat_config.get_redis_client(key.account_id) f, oldest_heartbeat = client.zrange(key.account_id, 0, 0, withscores=True).pop() client.zadd('account_index', oldest_heartbeat, key.account_id) except: # If all heartbeats were deleted at the same time as this, the pop # will fail -- ignore it. pass
def remove(self, key, device_id=None): # Remove a key from the store, or device entry from a key. client = heartbeat_config.get_redis_client(key.account_id) if device_id: client.hdel(key, device_id) # If that was the only entry, also remove from folder index. devices = client.hkeys(key) if devices == [str(device_id)] or devices == []: self.remove_from_folder_index(key, client) else: client.delete(key) self.remove_from_folder_index(key, client)
def test_folder_publish_in_index(redis_client): proxy = proxy_for(1, 2) proxy.publish() client = heartbeat_config.get_redis_client() assert "1" in [key.decode() for key in client.keys()] # Check the per-account folder-list index was populated correctly: it # should be a sorted set of all folder IDs for that account, with the # folder's last heartbeat timestamp. acct_folder_index = client.zrange("1", 0, -1, withscores=True) assert len(acct_folder_index) == 1 key, timestamp = acct_folder_index[0] assert key.decode() == "2" assert fuzzy_equals(proxy.heartbeat_at, timestamp)
def has_contacts_and_events(account_id): try: client = get_redis_client(STATUS_DATABASE) batch_client = client.pipeline() batch_client.keys(HeartbeatStatusKey.contacts(account_id)) batch_client.keys(HeartbeatStatusKey.events(account_id)) values = batch_client.execute() return (len(values[0]) == 1, len(values[1]) == 1) except Exception: log = get_logger() log.error('Error while reading the heartbeat status', account_id=account_id, exc_info=True) return (False, False)
def test_folder_publish_in_index(redis_client): proxy = proxy_for(1, 2) proxy.publish() client = heartbeat_config.get_redis_client() assert '1' in client.keys() # Check the per-account folder-list index was populated correctly: it # should be a sorted set of all folder IDs for that account, with the # folder's last heartbeat timestamp. acct_folder_index = client.zrange('1', 0, -1, withscores=True) assert len(acct_folder_index) == 1 key, timestamp = acct_folder_index[0] assert key == '2' assert fuzzy_equals(proxy.heartbeat_at, timestamp)
def clear_heartbeat_status(account_id, device_id=None): try: client = get_redis_client(STATUS_DATABASE) batch_client = client.pipeline() for name in client.scan_iter( HeartbeatStatusKey.all_folders(account_id), 100): if device_id: batch_client.hdel(name, device_id) else: batch_client.delete(name) batch_client.execute() except Exception: log = get_logger() log.error('Error while deleting from the heartbeat status', account_id=account_id, device_id=(device_id or 'all'), exc_info=True)
def get_accounts_folders(self, account_ids): # This is where things get interesting --- we need to make queries # to multiple shards and return the results to a single caller. # Preferred method of querying for multiple accounts. Uses pipelining # to reduce the number of requests to redis. account_ids_grouped_by_shards = [] # A magic one-liner to group account ids by shard. # http://stackoverflow.com/questions/8793772/how-to-split-a-sequence-according-to-a-predicate shard_num = heartbeat_config.account_redis_shard_number account_ids_grouped_by_shards = [ list(v[1]) for v in itertools.groupby( sorted(account_ids, key=shard_num), key=shard_num ) ] results = dict() for account_group in account_ids_grouped_by_shards: if not account_group: continue client = heartbeat_config.get_redis_client(account_group[0]) # Because of the way pipelining works, redis buffers data. # We break our requests in chunk to not have to ask for # impossibly big numbers. for chnk in chunk(account_group, 10000): pipe = client.pipeline() for index in chnk: pipe.zrange(index, 0, -1, withscores=True) pipe_results = pipe.execute() for i, account_id in enumerate(chnk): account_id = int(account_id) results[account_id] = pipe_results[i] return results
def remove_folders(self, account_id, folder_id=None, device_id=None): # Remove heartbeats for the given account, folder and/or device. if folder_id: key = HeartbeatStatusKey(account_id, folder_id) self.remove(key, device_id) # Update the account's oldest heartbeat after deleting a folder self.update_accounts_index(key) return 1 # 1 item removed else: # Remove all folder timestamps and account-level indices match = HeartbeatStatusKey.all_folders(account_id) client = heartbeat_config.get_redis_client(account_id) pipeline = client.pipeline() n = 0 for key in client.scan_iter(match, 100): self.remove(key, device_id, pipeline) n += 1 if not device_id: self.remove_from_account_index(account_id, pipeline) pipeline.execute() pipeline.reset() return n
def get_accounts_folders(self, account_ids): # This is where things get interesting --- we need to make queries # to multiple shards and return the results to a single caller. # Preferred method of querying for multiple accounts. Uses pipelining # to reduce the number of requests to redis. account_ids_grouped_by_shards = [] # A magic one-liner to group account ids by shard. # http://stackoverflow.com/questions/8793772/how-to-split-a-sequence-according-to-a-predicate shard_num = heartbeat_config.account_redis_shard_number account_ids_grouped_by_shards = [list(v[1]) for v in itertools.groupby( sorted(account_ids, key=shard_num), key=shard_num)] results = dict() for account_group in account_ids_grouped_by_shards: if not account_group: continue client = heartbeat_config.get_redis_client(account_group[0]) # Because of the way pipelining works, redis buffers data. # We break our requests in chunk to not have to ask for # impossibly big numbers. for chnk in chunk(account_group, 10000): pipe = client.pipeline() for index in chnk: pipe.zrange(index, 0, -1, withscores=True) pipe_results = pipe.execute() for i, account_id in enumerate(chnk): account_id = int(account_id) results[account_id] = pipe_results[i] return results
def __init__(self, host=None, port=6379): self.client = get_redis_client(host, port)
def get_heartbeat_status(host=None, port=6379, account_id=None): if host: thresholds = _get_alive_thresholds() client = _get_redis_client(host, port, STATUS_DATABASE) else: thresholds = get_alive_thresholds() client = get_redis_client(STATUS_DATABASE) batch_client = client.pipeline() keys = [] match_key = None if account_id: match_key = HeartbeatStatusKey.all_folders(account_id) for k in client.scan_iter(match=match_key, count=100): if k == 'ElastiCacheMasterReplicationTimestamp': continue batch_client.hgetall(k) keys.append(k) values = batch_client.execute() now = datetime.utcnow() accounts = {} for (k, v) in zip(keys, values): key = HeartbeatStatusKey.from_string(k) account_alive, provider_name, folders = accounts.get(key.account_id, (True, '', {})) folder_alive, folder_name, devices = folders.get(key.folder_id, (True, '', {})) for device_id in v: value = json.loads(v[device_id]) provider_name = value['provider_name'] folder_name = value['folder_name'] heartbeat_at = datetime.strptime(value['heartbeat_at'], '%Y-%m-%d %H:%M:%S.%f') state = value.get('state', None) action = value.get('action', None) if key.folder_id == -1: # contacts device_alive = (now - heartbeat_at) < thresholds.contacts elif key.folder_id == -2: # events device_alive = (now - heartbeat_at) < thresholds.events elif provider_name == 'eas' and action == 'ping': # eas w/ ping device_alive = (now - heartbeat_at) < thresholds.eas else: device_alive = (now - heartbeat_at) < thresholds.base device_alive = device_alive and \ (state in set([None, 'initial', 'poll'])) devices[int(device_id)] = {'heartbeat_at': str(heartbeat_at), 'state': state, 'action': action, 'alive': device_alive} # a folder is alive if and only if all the devices handling that # folder are alive folder_alive = folder_alive and device_alive folders[key.folder_id] = (folder_alive, folder_name, devices) # an account is alive if and only if all the folders of the account # are alive account_alive = account_alive and folder_alive accounts[key.account_id] = (account_alive, provider_name, folders) return accounts
def get_account_folders(self, account_id): client = heartbeat_config.get_redis_client(account_id) return client.zrange(account_id, 0, -1, withscores=True)
def update_folder_index(self, key, timestamp): assert isinstance(timestamp, float) # Update the folder timestamp index for this specific account, too client = heartbeat_config.get_redis_client(key.account_id) client.zadd(key.account_id, timestamp, key.folder_id)
def get_index(self, index): # Get all elements in the specified index. client = heartbeat_config.get_redis_client(self.host, self.port) return client.zrange(index, 0, -1, withscores=True)