def follow_op(cls, account, op_json, date): op = cls._validated_op(account, op_json, date) if not op: return # perform delta check new_state = op['state'] old_state = cls._get_follow_db_state(op['flr'], op['flg']) if new_state == (old_state or 0): return # insert or update state if old_state is None: sql = """INSERT INTO hive_follows (follower, following, created_at, state) VALUES (:flr, :flg, :at, :state)""" else: sql = """UPDATE hive_follows SET state = :state WHERE follower = :flr AND following = :flg""" query(sql, **op) # track count deltas if not DbState.is_initial_sync(): if new_state == 1: Follow.follow(op['flr'], op['flg']) if old_state == 1: Follow.unfollow(op['flr'], op['flg'])
def insert(cls, post_id, account_id, created_at): """Inserts a [re-]post by an account into feed.""" assert not DbState.is_initial_sync(), 'writing to feed cache in sync' sql = """INSERT INTO hive_feed_cache (account_id, post_id, created_at) VALUES (:account_id, :id, :created_at) ON CONFLICT (account_id, post_id) DO NOTHING""" query(sql, account_id=account_id, id=post_id, created_at=created_at)
def rebuild(cls, truncate=True): print("[HIVE] Rebuilding feed cache, this will take a few minutes.") query("START TRANSACTION") if truncate: query("TRUNCATE TABLE hive_feed_cache") lap_0 = time.perf_counter() query(""" INSERT INTO hive_feed_cache (account_id, post_id, created_at) SELECT hive_accounts.id, hive_posts.id, hive_posts.created_at FROM hive_posts JOIN hive_accounts ON hive_posts.author = hive_accounts.name WHERE depth = 0 AND is_deleted = '0' ON CONFLICT DO NOTHING """) lap_1 = time.perf_counter() query(""" INSERT INTO hive_feed_cache (account_id, post_id, created_at) SELECT hive_accounts.id, post_id, hive_reblogs.created_at FROM hive_reblogs JOIN hive_accounts ON hive_reblogs.account = hive_accounts.name ON CONFLICT DO NOTHING """) lap_2 = time.perf_counter() query("COMMIT") print("[HIVE] Rebuilt hive feed cache in {}s ({}+{})".format( int(lap_2 - lap_0), int(lap_1 - lap_0), int(lap_2 - lap_1)))
def process_blocks(blocks, is_initial_sync=False): dirty = set() query("START TRANSACTION") for block in blocks: dirty |= process_block(block, is_initial_sync) query("COMMIT") return dirty
def register_posts(ops, date): for op in ops: is_edit = query_one( "SELECT 1 FROM hive_posts WHERE author = '%s' AND permlink = '%s'" % (op['author'], op['permlink'])) if is_edit: continue # ignore edits to posts # this method needs to perform auth checking e.g. is op.author authorized to post in op.community? community_or_blog = create_post_as(op) or op['author'] if op['parent_author'] == '': parent_id = None depth = 0 category = op['parent_permlink'] else: parent_data = first( query( "SELECT id, depth, category FROM hive_posts WHERE author = '%s' " "AND permlink = '%s'" % (op['parent_author'], op['parent_permlink']))) parent_id, parent_depth, category = parent_data depth = parent_depth + 1 query( "INSERT INTO hive_posts (parent_id, author, permlink, category, community, depth, created_at) " "VALUES (%s, '%s', '%s', '%s', '%s', %d, '%s')" % (parent_id or 'NULL', op['author'], op['permlink'], category, community_or_blog, depth, date))
def reblog(cls, account, op_json, block_date): blogger = op_json['account'] author = op_json['author'] permlink = op_json['permlink'] if blogger != account: return # impersonation if not all(map(Accounts.exists, [author, blogger])): return post_id, depth = Posts.get_id_and_depth(author, permlink) if depth > 0: return # prevent comment reblogs if not post_id: print("reblog: post not found: {}/{}".format(author, permlink)) return if 'delete' in op_json and op_json['delete'] == 'delete': query("DELETE FROM hive_reblogs WHERE account = :a AND " "post_id = :pid LIMIT 1", a=blogger, pid=post_id) if not DbState.is_initial_sync(): FeedCache.delete(post_id, Accounts.get_id(blogger)) else: sql = ("INSERT INTO hive_reblogs (account, post_id, created_at) " "VALUES (:a, :pid, :date) ON CONFLICT (account, post_id) DO NOTHING") query(sql, a=blogger, pid=post_id, date=block_date) if not DbState.is_initial_sync(): FeedCache.insert(post_id, Accounts.get_id(blogger), block_date)
def listen(cls): trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks < 25 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap=40): start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def rebuild(cls, truncate=True): """Rebuilds the feed cache upon completion of initial sync.""" log.info("[HIVE] Rebuilding feed cache, this will take a few minutes.") query("START TRANSACTION") if truncate: query("TRUNCATE TABLE hive_feed_cache") lap_0 = time.perf_counter() query(""" INSERT INTO hive_feed_cache (account_id, post_id, created_at) SELECT hive_accounts.id, hive_posts.id, hive_posts.created_at FROM hive_posts JOIN hive_accounts ON hive_posts.author = hive_accounts.name WHERE depth = 0 AND is_deleted = '0' ON CONFLICT DO NOTHING """) lap_1 = time.perf_counter() query(""" INSERT INTO hive_feed_cache (account_id, post_id, created_at) SELECT hive_accounts.id, post_id, hive_reblogs.created_at FROM hive_reblogs JOIN hive_accounts ON hive_reblogs.account = hive_accounts.name ON CONFLICT DO NOTHING """) lap_2 = time.perf_counter() query("COMMIT") log.info("[HIVE] Rebuilt hive feed cache in %ds (%d+%d)", (lap_2 - lap_0), (lap_1 - lap_0), (lap_2 - lap_1))
def delete(cls, op): pid, depth = cls.get_id_and_depth(op['author'], op['permlink']) query("UPDATE hive_posts SET is_deleted = '1' WHERE id = :id", id=pid) if not DbState.is_initial_sync(): CachedPost.delete(pid, op['author'], op['permlink']) if depth == 0: FeedCache.delete(pid)
def update_follows(cls, accounts): sql = """ UPDATE hive_accounts SET followers = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND following = hive_accounts.name), following = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND follower = hive_accounts.name) WHERE name IN :names """ query(sql, names=tuple(accounts))
def register_accounts(accounts, date): for account in set(accounts): if not get_account_id(account): query( "INSERT INTO hive_accounts (name, created_at) " "VALUES (:name, :date)", name=account, date=date)
def update_ranks(cls): sql = """ UPDATE hive_accounts SET rank = r.rnk FROM (SELECT id, ROW_NUMBER() OVER (ORDER BY vote_weight DESC) as rnk FROM hive_accounts) r WHERE hive_accounts.id = r.id AND rank != r.rnk; """ query(sql)
def listen_steemd(trail_blocks=2): steemd = get_adapter() curr_block = db_last_block() last_hash = False while True: curr_block = curr_block + 1 # if trailing too close, take a pause while trail_blocks > 0: if curr_block <= steemd.head_block() - trail_blocks: break time.sleep(0.5) # get the target block; if DNE, pause and retry block = steemd.get_block(curr_block) while not block: time.sleep(0.5) block = steemd.get_block(curr_block) num = int(block['block_id'][:8], base=16) print("[LIVE] Got block {} at {} with {} txs -- ".format( num, block['timestamp'], len(block['transactions'])), end='') # ensure the block we received links to our last if last_hash and last_hash != block['previous']: # this condition is very rare unless trail_blocks is 0 and fork is # encountered; to handle gracefully, implement a pop_block method raise Exception("Unlinkable block: have {}, got {} -> {})".format( last_hash, block['previous'], block['block_id'])) last_hash = block['block_id'] start_time = time.perf_counter() query("START TRANSACTION") dirty = process_block(block) update_posts_batch(Posts.urls_to_tuples(dirty), steemd, block['timestamp']) paidout = select_paidout_posts(block['timestamp']) update_posts_batch(paidout, steemd, block['timestamp']) Accounts.cache_dirty() Accounts.cache_dirty_follows() print("{} edits, {} payouts".format(len(dirty), len(paidout))) query("COMMIT") secs = time.perf_counter() - start_time if secs > 1: print("WARNING: block {} process took {}s".format(num, secs)) # approx once per hour, update accounts if num % 1200 == 0: print("Performing account maintenance...") Accounts.cache_old() Accounts.update_ranks()
def update_follows(cls, accounts): ids = map(cls.get_id, accounts) sql = """ UPDATE hive_accounts SET followers = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND following = hive_accounts.id), following = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND follower = hive_accounts.id) WHERE id IN :ids """ query(sql, ids=tuple(ids))
def update_ranks(cls): """Rebuild `hive_accounts` table rank-by-vote-weight column.""" sql = """ UPDATE hive_accounts SET rank = r.rnk FROM (SELECT id, ROW_NUMBER() OVER (ORDER BY vote_weight DESC) as rnk FROM hive_accounts) r WHERE hive_accounts.id = r.id AND rank != r.rnk; """ query(sql)
def flush_recount(cls): ids = set([*cls._delta[FOLLOWERS].keys(), *cls._delta[FOLLOWING].keys()]) sql = """ UPDATE hive_accounts SET followers = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND following = hive_accounts.id), following = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND follower = hive_accounts.id) WHERE id IN :ids """ query(sql, ids=tuple(ids))
def undelete(cls, op, date, pid): sql = """UPDATE hive_posts SET is_valid = :is_valid, is_deleted = '0', parent_id = :parent_id, category = :category, community = :community, depth = :depth WHERE id = :id""" post = cls._build_post(op, date, pid) query(sql, **post) if not DbState.is_initial_sync(): CachedPost.undelete(pid, post['author'], post['permlink']) cls._insert_feed_cache(post)
def _update_chain_state(cls, adapter): state = adapter.gdgp_extended() query("""UPDATE hive_state SET block_num = :block_num, steem_per_mvest = :spm, usd_per_steem = :ups, sbd_per_steem = :sps, dgpo = :dgpo""", block_num=state['dgpo']['head_block_number'], spm=state['steem_per_mvest'], ups=state['usd_per_steem'], sps=state['sbd_per_steem'], dgpo=json.dumps(state['dgpo'])) return state['dgpo']['head_block_number']
def process_multi(cls, blocks, is_initial_sync=False): query("START TRANSACTION") for block in blocks: cls._process(block, is_initial_sync) # Follows flushing needs to be atomic because recounts are # expensive. So is tracking follows at all; hence we track # deltas in memory and update follow/er counts in bulk. Follow.flush(trx=False) query("COMMIT")
def delete(cls, post_id, account_id=None): """Remove a post from feed cache. If `account_id` is specified, we remove a single entry (e.g. a singular un-reblog). Otherwise, we remove all instances of the post (e.g. a post was deleted; its entry and all reblogs need to be removed. """ assert not DbState.is_initial_sync(), 'writing to feed cache in sync' sql = "DELETE FROM hive_feed_cache WHERE post_id = :id" if account_id: sql = sql + " AND account_id = :account_id" query(sql, account_id=account_id, id=post_id)
def _push(cls, block): """Insert a row in `hive_blocks`.""" num = int(block['block_id'][:8], base=16) txs = block['transactions'] query("INSERT INTO hive_blocks (num, hash, prev, txs, ops, created_at) " "VALUES (:num, :hash, :prev, :txs, :ops, :date)", **{ 'num': num, 'hash': block['block_id'], 'prev': block['previous'], 'txs': len(txs), 'ops': sum([len(tx['operations']) for tx in txs]), 'date': block['timestamp']}) return num
def register(cls, names, block_date): new_names = list(filter(lambda n: not cls.exists(n), set(names))) if not new_names: return # insert new names and add the new ids to our mem map for name in new_names: query( "INSERT INTO hive_accounts (name, created_at) " "VALUES (:name, :date)", name=name, date=block_date) sql = "SELECT name, id FROM hive_accounts WHERE name IN :names" cls._ids = {**dict(query_all(sql, names=tuple(new_names))), **cls._ids}
def get_community(community_name): # sqlalchemy: # q = select([hive_communities]).where(hive_communities.c.account == community_name).limit(1) # conn.execute(q).fetchall() return first( query("SELECT * FROM hive_communities WHERE name = '%s' LIMIT 1" % community_name))
def flush_recount(cls): """Recounts follows/following counts for all queued accounts. This is currently not used; this approach was shown to be too expensive, but it's useful in case follow counts manage to get out of sync. """ ids = set( [*cls._delta[FOLLOWERS].keys(), *cls._delta[FOLLOWING].keys()]) sql = """ UPDATE hive_accounts SET followers = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND following = hive_accounts.id), following = (SELECT COUNT(*) FROM hive_follows WHERE state = 1 AND follower = hive_accounts.id) WHERE id IN :ids """ query(sql, ids=tuple(ids))
def select_paidout_posts(block_date): sql = """ SELECT post_id, author, permlink FROM hive_posts_cache WHERE post_id IN (SELECT post_id FROM hive_posts_cache WHERE is_paidout = 0 AND payout_at <= :date) """ return list(query(sql, date=block_date))
def get_accounts_follow_stats(accounts): sql = """SELECT follower, COUNT(*) FROM hive_follows WHERE follower IN :lst GROUP BY follower""" following = dict(query(sql, lst=accounts).fetchall()) for name in accounts: if name not in following: following[name] = 0 sql = """SELECT following, COUNT(*) FROM hive_follows WHERE following IN :lst GROUP BY following""" followers = dict(query(sql, lst=accounts).fetchall()) for name in accounts: if name not in followers: followers[name] = 0 return {'followers': followers, 'following': following}
def get_post_id_and_depth(author, permlink): res = None if author: res = first( query( "SELECT id, depth FROM hive_posts WHERE author = '%s' AND permlink = '%s'" % (author, permlink))) return res or (None, -1)
def process_multi(cls, blocks, is_initial_sync=False): """Batch-process blocks; wrapped in a transaction.""" query("START TRANSACTION") last_num = 0 try: for block in blocks: last_num = cls._process(block, is_initial_sync) except Exception as e: print("[FATAL] could not process block %d" % (last_num + 1)) raise e # Follows flushing needs to be atomic because recounts are # expensive. So is tracking follows at all; hence we track # deltas in memory and update follow/er counts in bulk. Follow.flush(trx=False) query("COMMIT")
async def get_following(account: str, skip: int, limit: int): account_id = _get_account_id(account) sql = """ SELECT name FROM hive_follows hf JOIN hive_accounts ON hf.following = id WHERE hf.follower = :account_id AND state = 1 ORDER BY hf.created_at DESC LIMIT :limit OFFSET :skip """ res = query(sql, account_id=account_id, skip=int(skip), limit=int(limit)) return [[r[0], str(r[1])] for r in res.fetchall()]
def listen(cls): """Live (block following) mode.""" trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if Conf.get('disable_sync') else 100 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = perf() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (perf() - start_time) * 1000 log.info( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accts,% 3d follows --% 5dms%s", num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '') # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)