def _generate_cache_sqls(cls, accounts, block_date=None): if not block_date: block_date = get_adapter().head_time() sqls = [] for account in get_adapter().get_accounts(accounts): values = { 'name': account['name'], 'proxy': account['proxy'], 'post_count': account['post_count'], 'reputation': rep_log10(account['reputation']), 'proxy_weight': amount(account['vesting_shares']), 'vote_weight': amount(account['vesting_shares']) + amount(account['received_vesting_shares']) - amount(account['delegated_vesting_shares']), 'kb_used': int(account['lifetime_bandwidth']) / 1e6 / 1024, 'active_at': account['last_bandwidth_update'], 'cached_at': block_date, **cls._safe_account_metadata(account) } update = ', '.join([k + " = :" + k for k in values.keys()][1:]) sql = "UPDATE hive_accounts SET %s WHERE name = :name" % (update) sqls.append([(sql, values)]) return sqls
def verify_head(cls): hive_head = cls.head_num() if not hive_head: return # move backwards from head until hive/steem agree to_pop = [] cursor = hive_head steemd = get_adapter() while True: assert hive_head - cursor < 25, "fork too deep" hive_block = cls.get(cursor) steem_hash = steemd.get_block(cursor)['block_id'] match = hive_block['hash'] == steem_hash print("[INIT] fork check. block %d: %s vs %s --- %s" % (hive_block['num'], hive_block['hash'], steem_hash, 'ok' if match else 'invalid')) if match: break to_pop.append(hive_block) cursor -= 1 if hive_head == cursor: return # no fork! print("[FORK] depth is %d; popping blocks %d - %d" % (hive_head - cursor, cursor + 1, hive_head)) # we should not attempt to recover from fork until it's safe fork_limit = get_adapter().last_irreversible() assert cursor < fork_limit, "not proceeding until head is irreversible" cls._pop(to_pop)
def listen_steemd(trail_blocks=2): steemd = get_adapter() curr_block = db_last_block() last_hash = False while True: curr_block = curr_block + 1 # if trailing too close, take a pause while trail_blocks > 0: if curr_block <= steemd.head_block() - trail_blocks: break time.sleep(0.5) # get the target block; if DNE, pause and retry block = steemd.get_block(curr_block) while not block: time.sleep(0.5) block = steemd.get_block(curr_block) num = int(block['block_id'][:8], base=16) print("[LIVE] Got block {} at {} with {} txs -- ".format( num, block['timestamp'], len(block['transactions'])), end='') # ensure the block we received links to our last if last_hash and last_hash != block['previous']: # this condition is very rare unless trail_blocks is 0 and fork is # encountered; to handle gracefully, implement a pop_block method raise Exception("Unlinkable block: have {}, got {} -> {})".format( last_hash, block['previous'], block['block_id'])) last_hash = block['block_id'] start_time = time.perf_counter() query("START TRANSACTION") dirty = process_block(block) update_posts_batch(Posts.urls_to_tuples(dirty), steemd, block['timestamp']) paidout = select_paidout_posts(block['timestamp']) update_posts_batch(paidout, steemd, block['timestamp']) Accounts.cache_dirty() Accounts.cache_dirty_follows() print("{} edits, {} payouts".format(len(dirty), len(paidout))) query("COMMIT") secs = time.perf_counter() - start_time if secs > 1: print("WARNING: block {} process took {}s".format(num, secs)) # approx once per hour, update accounts if num % 1200 == 0: print("Performing account maintenance...") Accounts.cache_old() Accounts.update_ranks()
def update_chain_state(): state = get_adapter().gdgp_extended() query("""UPDATE hive_state SET block_num = :block_num, steem_per_mvest = :spm, usd_per_steem = :ups, sbd_per_steem = :sps, dgpo = :dgpo""", block_num=state['dgpo']['head_block_number'], spm=state['steem_per_mvest'], ups=state['usd_per_steem'], sps=state['sbd_per_steem'], dgpo=json.dumps(state['dgpo'])) return state['dgpo']['head_block_number']
def sync_from_steemd(): is_initial_sync = DbState.is_initial_sync() steemd = get_adapter() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() if ubound <= lbound: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, ubound-lbound+1)) timer = Timer(ubound - lbound, entity='block', laps=['rps', 'wps']) while lbound < ubound: to = min(lbound + 1000, ubound) timer.batch_start() blocks = steemd.get_blocks_range(lbound, to) timer.batch_lap() Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print(timer.batch_status("[SYNC] Got block %d @ %s" % (to-1, date))) lbound = to except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # Follows flushing may need to be moved closer to core (i.e. moved # into main block transactions). Important to keep in sync since # we need to prevent expensive recounts. This will fail if we aborted # in the middle of a transaction, meaning data loss. Better than # forcing it, however, since in-memory cache will be out of sync # with db state. Follow.flush(trx=True) # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def cache_missing_posts(): # cached posts inserted sequentially, so just compare MAX(id)'s sql = ("SELECT (SELECT IFNULL(MAX(id), 0) FROM hive_posts) - " "(SELECT IFNULL(MAX(post_id), 0) FROM hive_posts_cache)") missing_count = query_one(sql) print("[INIT] Found {} missing post cache entries".format(missing_count)) if not missing_count: return # process in batches of 1m posts missing = select_missing_posts(1e6) while missing: update_posts_batch(missing, get_adapter()) missing = select_missing_posts(1e6)
def _generate_cache_sqls(cls, accounts): cached_at = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') sqls = [] for account in get_adapter().get_accounts(accounts): vote_weight = (amount(account['vesting_shares']) + amount(account['received_vesting_shares']) - amount(account['delegated_vesting_shares'])) # remove empty keys useless = [ 'transfer_history', 'market_history', 'post_history', 'vote_history', 'other_history', 'tags_usage', 'guest_bloggers' ] for key in useless: del account[key] # pull out valid profile md and delete the key profile = cls._safe_account_metadata(account) del account['json_metadata'] values = { 'name': account['name'], 'proxy': account['proxy'], 'post_count': account['post_count'], 'reputation': rep_log10(account['reputation']), 'proxy_weight': amount(account['vesting_shares']), 'vote_weight': vote_weight, 'kb_used': int(account['lifetime_bandwidth']) / 1e6 / 1024, 'active_at': account['last_bandwidth_update'], 'cached_at': cached_at, 'display_name': profile['name'], 'about': profile['about'], 'location': profile['location'], 'website': profile['website'], 'profile_image': profile['profile_image'], 'cover_image': profile['cover_image'], 'raw_json': json.dumps(account) } update = ', '.join([k + " = :" + k for k in list(values.keys())][1:]) sql = "UPDATE hive_accounts SET %s WHERE name = :name" % (update) sqls.append((sql, values)) return sqls
def sync_from_steemd(is_initial_sync): steemd = get_adapter() dirty = set() lbound = db_last_block() + 1 ubound = steemd.last_irreversible_block_num() print("[SYNC] {} blocks to batch sync".format(ubound - lbound + 1)) print("[SYNC] start sync from block %d" % lbound) while lbound < ubound: to = min(lbound + 1000, ubound) lap_0 = time.perf_counter() blocks = steemd.get_blocks_range(lbound, to) lap_1 = time.perf_counter() dirty |= process_blocks(blocks, is_initial_sync) lap_2 = time.perf_counter() rate = (to - lbound) / (lap_2 - lap_0) rps = int((to - lbound) / (lap_1 - lap_0)) wps = int((to - lbound) / (lap_2 - lap_1)) print( "[SYNC] Got block {} ({}/s, {}rps {}wps) -- {}m remaining".format( to - 1, round(rate, 1), rps, wps, round((ubound - to) / rate / 60, 2))) lbound = to # batch update post cache after catching up to head block if not is_initial_sync: print("[PREP] Update {} edited posts".format(len(dirty))) update_posts_batch(Posts.urls_to_tuples(dirty), steemd) date = steemd.head_time() paidout = select_paidout_posts(date) print("[PREP] Process {} payouts since {}".format(len(paidout), date)) update_posts_batch(paidout, steemd, date)
async def call(api, method, params): if method == 'get_followers': return await get_followers(params[0], params[1], params[2], params[3]) elif method == 'get_following': return await get_following(params[0], params[1], params[2], params[3]) elif method == 'get_follow_count': return await get_follow_count(params[0]) elif method == 'get_discussions_by_trending': return await get_discussions_by_trending(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_hot': return await get_discussions_by_hot(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_promoted': return await get_discussions_by_promoted(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_created': return await get_discussions_by_created(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_blog': return await get_discussions_by_blog(params[0]['tag'], params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_discussions_by_feed': return await get_discussions_by_feed(params[0]['tag'], params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_discussions_by_comments': return await get_discussions_by_comments(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_replies_by_last_update': return await get_replies_by_last_update(params[0], params[1], params[2]) elif method == 'get_content': return await get_content(params[0], params[1]) # after submit vote/post elif method == 'get_content_replies': return await get_content_replies(params[0], params[1]) elif method == 'get_state': return await get_state(params[0]) # passthrough -- TESTING ONLY! if method == 'get_dynamic_global_properties': return get_adapter()._gdgp( ) # condenser only uses total_vesting_fund_steem, total_vesting_shares, sbd_interest_rate elif method == 'get_accounts': return get_adapter().get_accounts(params[0]) elif method == 'get_open_orders': return get_adapter()._client.exec('get_open_orders', params[0]) elif method == 'get_block': return get_adapter()._client.exec('get_block', params[0]) elif method == 'broadcast_transaction_synchronous': return get_adapter()._client.exec('broadcast_transaction_synchronous', params[0], api='network_broadcast_api') elif method == 'get_savings_withdraw_to': return get_adapter()._client.exec('get_savings_withdraw_to', params[0]) elif method == 'get_savings_withdraw_from': return get_adapter()._client.exec('get_savings_withdraw_from', params[0]) raise Exception("unknown method: {}.{}({})".format(api, method, params))
async def get_state(path: str): if path[0] == '/': path = path[1:] if not path: path = 'trending' part = path.split('/') if len(part) > 3: raise Exception("invalid path %s" % path) while len(part) < 3: part.append('') state = {} state['current_route'] = path state['props'] = _get_props_lite() state['tags'] = {} state['tag_idx'] = {} state['tag_idx']['trending'] = [] state['content'] = {} state['accounts'] = {} state['discussion_idx'] = {"": {}} state['feed_price'] = _get_feed_price() state1 = "{}".format(state) # account tabs (feed, blog, comments, replies) if part[0] and part[0][0] == '@': if not part[1]: part[1] = 'blog' if part[1] == 'transfers': raise Exception("transfers API not served by hive") if part[2]: raise Exception("unexpected account path part[2] %s" % path) account = part[0][1:] keys = { 'recent-replies': 'recent_replies', 'comments': 'comments', 'blog': 'blog', 'feed': 'feed' } if part[1] not in keys: raise Exception("invalid account path %s" % path) key = keys[part[1]] # TODO: use _load_accounts([account])? Examine issue w/ login account_obj = get_adapter().get_accounts([account])[0] state['accounts'][account] = account_obj if key == 'recent_replies': posts = await get_replies_by_last_update(account, "", 20) elif key == 'comments': posts = await get_discussions_by_comments(account, "", 20) elif key == 'blog': posts = await get_discussions_by_blog(account, "", "", 20) elif key == 'feed': posts = await get_discussions_by_feed(account, "", "", 20) state['accounts'][account][key] = [] for post in posts: ref = post['author'] + '/' + post['permlink'] state['accounts'][account][key].append(ref) state['content'][ref] = post # discussion thread elif part[1] and part[1][0] == '@': author = part[1][1:] permlink = part[2] state['content'] = _load_discussion_recursive(author, permlink) accounts = set(map(lambda p: p['author'], state['content'].values())) state['accounts'] = {a['name']: a for a in _load_accounts(accounts)} # trending/etc pages elif part[0] in ['trending', 'promoted', 'hot', 'created']: if part[2]: raise Exception("unexpected discussion path part[2] %s" % path) sort = part[0] tag = part[1].lower() posts = _get_discussions(sort, '', '', 20, tag) state['discussion_idx'][tag] = {sort: []} for post in posts: ref = post['author'] + '/' + post['permlink'] state['content'][ref] = post state['discussion_idx'][tag][sort].append(ref) state['tag_idx']['trending'] = await _get_top_trending_tags() # witness list elif part[0] == 'witnesses': raise Exception("not implemented") # tag "explorer" elif part[0] == "tags": state['tag_idx']['trending'] = [] tags = await _get_trending_tags() for t in tags: state['tag_idx']['trending'].append(t['name']) state['tags'][t['name']] = t else: raise Exception("unknown path {}".format(path)) # (debug; should not happen) if state did not change, complain state2 = "{}".format(state) if state1 == state2: raise Exception("unrecognized path `{}`" % path) return state
def head_state(*args): _ = args # JSONRPC injects 4 arguments here steemd_head = get_adapter().head_block() hive_head = db_last_block() diff = steemd_head - hive_head return dict(steemd=steemd_head, hive=hive_head, diff=diff)
def listen_steemd(trail_blocks=0, max_gap=50): assert trail_blocks >= 0 assert trail_blocks < 25 # db state db_last = Blocks.last() last_block = db_last['num'] last_hash = db_last['hash'] # chain state steemd = get_adapter() head_block = steemd.head_block() next_expected = time.time() # loop state tries = 0 queue = [] # TODO: detect missed blocks by looking at block timestamps. # this would be an even more efficient way to track slots. while True: assert not last_block > head_block # fast fwd head block if slots missed curr_time = time.time() while curr_time >= next_expected: head_block += 1 next_expected += 3 # if gap too large, abort. if caught up, wait. gap = head_block - last_block if gap > max_gap: print("[LIVE] gap too large: %d -- abort listen mode" % gap) return elif gap > 0: print("[LIVE] %d blocks behind..." % gap) elif gap == 0: time.sleep(next_expected - curr_time) head_block += 1 next_expected += 3 # get the target block; if DNE, pause and retry block_num = last_block + 1 block = steemd.get_block(block_num) if not block: tries += 1 print("[LIVE] block %d unavailable (try %d). delay 1s. head: %d/%d." % (block_num, tries, head_block, steemd.head_block())) #assert tries < 12, "could not fetch block %s" % block_num assert tries < 240, "could not fetch block %s" % block_num #74 time.sleep(1) # pause for 1s; and, next_expected += 1 # delay schedule 1s continue last_block = block_num tries = 0 # ensure this block links to our last; otherwise, blow up. see #59 if last_hash != block['previous']: if queue: print("[FORK] Fork encountered. Emptying queue to retry!") return raise Exception("Unlinkable block: have %s, got %s -> %s)" % (last_hash, block['previous'], block['block_id'])) last_hash = block['block_id'] # buffer until queue full queue.append(block) if len(queue) <= trail_blocks: continue # buffer primed; process head of queue # ------------------------------------ block = queue.pop(0) start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: update_chain_state()