async def get_user_feed(account: str, skip: int, limit: int, context: str = None): account_id = _get_account_id(account) sql = """ SELECT post_id, string_agg(name, ',') accounts FROM hive_feed_cache JOIN hive_follows ON account_id = hive_follows.following AND state = 1 JOIN hive_accounts ON hive_follows.following = hive_accounts.id WHERE hive_follows.follower = :account GROUP BY post_id ORDER BY MIN(hive_feed_cache.created_at) DESC LIMIT :limit OFFSET :skip """ res = query_all(sql, account=account_id, skip=skip, limit=limit) posts = _get_posts([r[0] for r in res], context) # Merge reblogged_by data into result set accts = dict(res) for post in posts: rby = set(accts[post['post_id']].split(',')) rby.discard(post['author']) if rby: post['reblogged_by'] = list(rby) return posts
def pids_by_feed_with_reblog(account: str, start_author: str = '', start_permlink: str = '', limit: int = 20): account_id = get_account_id(account) seek = '' if start_permlink: seek = """ HAVING MIN(hive_feed_cache.created_at) <= ( SELECT MIN(created_at) FROM hive_feed_cache WHERE post_id = %d AND account_id IN (SELECT following FROM hive_follows WHERE follower = :account AND state = 1)) """ % get_post_id(start_author, start_permlink) sql = """ SELECT post_id, string_agg(name, ',') accounts FROM hive_feed_cache JOIN hive_follows ON account_id = hive_follows.following AND state = 1 JOIN hive_accounts ON hive_follows.following = hive_accounts.id WHERE hive_follows.follower = :account GROUP BY post_id %s ORDER BY MIN(hive_feed_cache.created_at) DESC LIMIT :limit """ % seek return query_all(sql, account=account_id, limit=limit)
def load_posts(ids, truncate_body=0): """Given an array of post ids, returns full objects in the same order.""" if not ids: return [] sql = """ SELECT post_id, author, permlink, title, body, promoted, payout, created_at, payout_at, is_paidout, rshares, raw_json, category, depth, json, children, votes, author_rep, updated_at, preview, img_url, is_nsfw FROM hive_posts_cache WHERE post_id IN :ids """ # key by id so we can return sorted by input order posts_by_id = {} for row in query_all(sql, ids=tuple(ids)): row = dict(row) post = _condenser_post_object(row, truncate_body=truncate_body) posts_by_id[row['post_id']] = post # in rare cases of cache inconsistency, recover and warn missed = set(ids) - posts_by_id.keys() if missed: log.warning("get_posts do not exist in cache: %s", repr(missed)) for _id in missed: sql = ( "SELECT id, author, permlink, depth, created_at, is_deleted " "FROM hive_posts WHERE id = :id") log.warning("missing: %s", dict(query_row(sql, id=_id))) ids.remove(_id) return [posts_by_id[_id] for _id in ids]
def _get_posts(ids): if not ids: raise Exception("no ids provided") sql = """ SELECT post_id, author, permlink, title, body, promoted, payout, created_at, payout_at, is_paidout, rshares, raw_json, category, depth, json, children, votes, author_rep, preview, img_url, is_nsfw FROM hive_posts_cache WHERE post_id IN :ids """ # key by id so we can return sorted by input order posts_by_id = {} for row in query_all(sql, ids=tuple(ids)): row = dict(row) post = _condenser_post_object(row) posts_by_id[row['post_id']] = post # in rare cases of cache inconsistency, recover and warn missed = set(ids) - posts_by_id.keys() if missed: print("WARNING: get_posts do not exist in cache: {}".format(missed)) for _id in missed: ids.remove(_id) return [posts_by_id[_id] for _id in ids]
def pids_by_feed_with_reblog(account: str, start_author: str = '', start_permlink: str = '', limit: int = 20): """Get a list of [post_id, reblogged_by_str] for an account's feed.""" account_id = _get_account_id(account) seek = '' if start_permlink: start_id = _get_post_id(start_author, start_permlink) if not start_id: return [] seek = """ HAVING MIN(hive_feed_cache.created_at) <= ( SELECT MIN(created_at) FROM hive_feed_cache WHERE post_id = %d AND account_id IN (SELECT following FROM hive_follows WHERE follower = :account AND state = 1)) """ % start_id sql = """ SELECT post_id, string_agg(name, ',') accounts FROM hive_feed_cache JOIN hive_follows ON account_id = hive_follows.following AND state = 1 JOIN hive_accounts ON hive_follows.following = hive_accounts.id WHERE hive_follows.follower = :account AND hive_feed_cache.created_at > :cutoff GROUP BY post_id %s ORDER BY MIN(hive_feed_cache.created_at) DESC LIMIT :limit """ % seek return query_all(sql, account=account_id, limit=limit, cutoff=last_month())
async def get_trending_tags(start_tag: str = '', limit: int = 250): """Get top 250 trending tags among pending posts, with stats.""" assert start_tag == '', 'tags pagination not supported' assert limit == 250, 'only returns exactly 250 tags' sql = """ SELECT category, COUNT(*) AS total_posts, SUM(CASE WHEN depth = 0 THEN 1 ELSE 0 END) AS top_posts, SUM(payout) AS total_payouts FROM hive_posts_cache WHERE is_paidout = '0' GROUP BY category ORDER BY SUM(payout) DESC LIMIT 250 """ out = [] for row in query_all(sql): out.append({ 'comments': row['total_posts'] - row['top_posts'], 'name': row['category'], 'top_posts': row['top_posts'], 'total_payouts': "%.3f SBD" % row['total_payouts'] }) return out
def load_accounts(names): """`get_accounts`-style lookup for `get_state` compat layer.""" sql = """SELECT id, name, display_name, about, reputation, vote_weight, created_at, post_count, profile_image, location, website, cover_image FROM hive_accounts WHERE name IN :names""" rows = query_all(sql, names=tuple(names)) return [_condenser_account_object(row) for row in rows]
async def get_following(account: str, skip: int, limit: int): account_id = _get_account_id(account) sql = """ SELECT name FROM hive_follows hf JOIN hive_accounts ON hf.following = id WHERE hf.follower = :account_id AND state = 1 ORDER BY hf.created_at DESC LIMIT :limit OFFSET :skip """ res = query_all(sql, account_id=account_id, skip=int(skip), limit=int(limit)) return [[r[0], str(r[1])] for r in res]
def register(cls, names, block_date): new_names = list(filter(lambda n: not cls.exists(n), set(names))) if not new_names: return # insert new names and add the new ids to our mem map for name in new_names: query( "INSERT INTO hive_accounts (name, created_at) " "VALUES (:name, :date)", name=name, date=block_date) sql = "SELECT name, id FROM hive_accounts WHERE name IN :names" cls._ids = {**dict(query_all(sql, names=tuple(new_names))), **cls._ids}
def _load_accounts(names): sql = "SELECT id,name,display_name,about,reputation FROM hive_accounts WHERE name IN :names" accounts = [] for row in query_all(sql, names=tuple(names)): account = {} account['name'] = row['name'] account['reputation'] = _rep_to_raw(row['reputation']) account['json_metadata'] = json.dumps( {'profile': { 'name': row['display_name'], 'about': row['about'] }}) accounts.append(account) return accounts
def _get_posts(ids, context=None): sql = """ SELECT post_id, author, permlink, title, preview, img_url, payout, promoted, created_at, payout_at, is_nsfw, rshares, votes, json FROM hive_posts_cache WHERE post_id IN :ids """ reblogged_ids = [] if context: reblogged_ids = query_col( "SELECT post_id FROM hive_reblogs " "WHERE account = :a AND post_id IN :ids", a=context, ids=tuple(ids)) # key by id so we can return sorted by input order posts_by_id = {} for row in query_all(sql, ids=tuple(ids)): obj = dict(row) if context: voters = [csa.split(",")[0] for csa in obj['votes'].split("\n")] obj['user_state'] = { 'reblogged': row['post_id'] in reblogged_ids, 'voted': context in voters } # TODO: Object of type 'Decimal' is not JSON serializable obj['payout'] = float(obj['payout']) obj['promoted'] = float(obj['promoted']) # TODO: Object of type 'datetime' is not JSON serializable obj['created_at'] = str(obj['created_at']) obj['payout_at'] = str(obj['payout_at']) obj.pop('votes') # temp obj.pop('json') # temp posts_by_id[row['post_id']] = obj # in rare cases of cache inconsistency, recover and warn missed = set(ids) - posts_by_id.keys() if missed: print("WARNING: _get_posts do not exist in cache: {}".format(missed)) for _id in missed: ids.remove(_id) return [posts_by_id[_id] for _id in ids]
async def _get_trending_tags(): sql = """ SELECT category, COUNT(*) AS total_posts, SUM(CASE WHEN depth = 0 THEN 1 ELSE 0 END) AS top_posts, SUM(payout) AS total_payouts FROM hive_posts_cache WHERE is_paidout = '0' GROUP BY category ORDER BY SUM(payout) DESC LIMIT 250 """ out = [] for row in query_all(sql): out.append({ 'comments': row['total_posts'] - row['top_posts'], 'name': row['category'], 'top_posts': row['top_posts'], 'total_payouts': "%.3f SBD" % row['total_payouts']}) return out
async def get_trending_tags(start_tag: str = '', limit: int = 250): """Get top 250 trending tags among pending posts, with stats.""" limit = valid_limit(limit, ubound=250) start_tag = valid_tag(start_tag or '', allow_empty=True) if start_tag: seek = """ HAVING SUM(payout) <= ( SELECT SUM(payout) FROM hive_posts_cache WHERE is_paidout = '0' AND category = :start_tag) """ else: seek = '' sql = """ SELECT category, COUNT(*) AS total_posts, SUM(CASE WHEN depth = 0 THEN 1 ELSE 0 END) AS top_posts, SUM(payout) AS total_payouts FROM hive_posts_cache WHERE is_paidout = '0' GROUP BY category %s ORDER BY SUM(payout) DESC LIMIT :limit """ % seek out = [] for row in query_all(sql, limit=limit, start_tag=start_tag): out.append({ 'name': row['category'], 'comments': row['total_posts'] - row['top_posts'], 'top_posts': row['top_posts'], 'total_payouts': "%.3f SBD" % row['total_payouts'] }) return out
async def get_discussions_by_feed(tag: str, start_author: str = '', start_permlink: str = '', limit: int = 20): limit = _validate_limit(limit, 20) account_id = _get_account_id(tag) seek = '' if start_permlink: seek = """ HAVING MIN(hive_feed_cache.created_at) <= ( SELECT MIN(created_at) FROM hive_feed_cache WHERE post_id = %d AND account_id IN (SELECT following FROM hive_follows WHERE follower = :account AND state = 1)) """ % _get_post_id(start_author, start_permlink) sql = """ SELECT post_id, string_agg(name, ',') accounts FROM hive_feed_cache JOIN hive_follows ON account_id = hive_follows.following AND state = 1 JOIN hive_accounts ON hive_follows.following = hive_accounts.id WHERE hive_follows.follower = :account GROUP BY post_id %s ORDER BY MIN(hive_feed_cache.created_at) DESC LIMIT :limit """ % seek res = query_all(sql, account=account_id, limit=limit) posts = _get_posts([r[0] for r in res]) # Merge reblogged_by data into result set accts = dict(res) for post in posts: rby = set(accts[post['post_id']].split(',')) rby.discard(post['author']) if rby: post['reblogged_by'] = list(rby) return posts
def update_ranks(cls): sql = """ UPDATE hive_accounts SET rank = r.rnk FROM (SELECT id, ROW_NUMBER() OVER (ORDER BY vote_weight DESC) as rnk FROM hive_accounts) r WHERE hive_accounts.id = r.id AND rank != r.rnk; """ query(sql) return # the following method is 10-20x slower id_weight = query_all("SELECT id, vote_weight FROM hive_accounts") id_weight = sorted(id_weight, key=lambda el: el[1], reverse=True) print("Updating account ranks...") lap_0 = time.perf_counter() query("START TRANSACTION") for (i, (_id, _)) in enumerate(id_weight): query("UPDATE hive_accounts SET rank=%d WHERE id=%d" % (i + 1, _id)) query("COMMIT") lap_1 = time.perf_counter() print("Updated %d ranks in %ds" % (len(id_weight), lap_1 - lap_0))
def register(cls, names, block_date): """Block processing: register "candidate" names. There are four ops which can result in account creation: *account_create*, *account_create_with_delegation*, *pow*, and *pow2*. *pow* ops result in account creation only when the account they name does not already exist! """ # filter out names which already registered new_names = list(filter(lambda n: not cls.exists(n), set(names))) if not new_names: return for name in new_names: query( "INSERT INTO hive_accounts (name, created_at) " "VALUES (:name, :date)", name=name, date=block_date) # pull newly-inserted ids and merge into our map sql = "SELECT name, id FROM hive_accounts WHERE name IN :names" cls._ids = {**dict(query_all(sql, names=tuple(new_names))), **cls._ids}
def update_ranks(cls): sql = """ UPDATE hive_accounts JOIN ( SELECT id, @rownum:=@rownum+1 rank FROM hive_accounts, (SELECT @rownum:=0) r ORDER BY vote_weight DESC ) ranks USING(id) SET hive_accounts.rank = ranks.rank """ query(sql) return # the following method is 10-20x slower id_weight = query_all("SELECT id, vote_weight FROM hive_accounts") id_weight = sorted(id_weight, key=lambda el: el[1], reverse=True) print("Updating account ranks...") lap_0 = time.perf_counter() query("START TRANSACTION") for (i, (_id, _)) in enumerate(id_weight): query("UPDATE hive_accounts SET rank=%d WHERE id=%d" % (i + 1, _id)) query("COMMIT") lap_1 = time.perf_counter() print("Updated %d ranks in %ds" % (len(id_weight), lap_1 - lap_0))
def _load_accounts(names): """`get_accounts`-style lookup for `get_state` compat layer.""" sql = """SELECT id, name, display_name, about, reputation FROM hive_accounts WHERE name IN :names""" rows = query_all(sql, names=tuple(names)) return [_condenser_account(row) for row in rows]
def _load_accounts(names): sql = """SELECT id, name, display_name, about, reputation FROM hive_accounts WHERE name IN :names""" rows = query_all(sql, names=tuple(names)) return [_condenser_account(row) for row in rows]
def load_ids(cls): assert not cls._ids, "id map only needs to be loaded once" cls._ids = dict(query_all("SELECT name, id FROM hive_accounts"))
def load_ids(cls): """Load a full (name: id) dict into memory.""" assert not cls._ids, "id map already loaded" cls._ids = dict(query_all("SELECT name, id FROM hive_accounts"))