def _update_batch(cls, tuples, trx=True): steemd = SteemClient.instance() timer = Timer(total=len(tuples), entity='post', laps=['rps', 'wps']) tuples = sorted(tuples, key=lambda x: x[1]) # enforce ASC id's for tups in partition_all(1000, tuples): timer.batch_start() buffer = [] post_args = [tup[0].split('/') for tup in tups] posts = steemd.get_content_batch(post_args) post_ids = [tup[1] for tup in tups] post_levels = [tup[2] for tup in tups] for pid, post, level in zip(post_ids, posts, post_levels): if post['author']: buffer.append(cls._sql(pid, post, level=level)) else: print("WARNING: ignoring deleted post {}".format(pid)) cls._bump_last_id(pid) timer.batch_lap() cls._batch_queries(buffer, trx) timer.batch_finish(len(posts)) if len(tuples) >= 1000: print(timer.batch_status())
def verify_head(cls): """Perform a fork recovery check on startup.""" hive_head = cls.head_num() if not hive_head: return # move backwards from head until hive/steem agree to_pop = [] cursor = hive_head steemd = SteemClient.instance() while True: assert hive_head - cursor < 25, "fork too deep" hive_block = cls._get(cursor) steem_hash = steemd.get_block(cursor)['block_id'] match = hive_block['hash'] == steem_hash print("[INIT] fork check. block %d: %s vs %s --- %s" % (hive_block['num'], hive_block['hash'], steem_hash, 'ok' if match else 'invalid')) if match: break to_pop.append(hive_block) cursor -= 1 if hive_head == cursor: return # no fork! print("[FORK] depth is %d; popping blocks %d - %d" % (hive_head - cursor, cursor + 1, hive_head)) # we should not attempt to recover from fork until it's safe fork_limit = steemd.last_irreversible() assert cursor < fork_limit, "not proceeding until head is irreversible" cls._pop(to_pop)
def listen(cls): trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks < 25 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap=40): start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def _load_account(name): #account = load_accounts([name])[0] #for key in ['recent_replies', 'comments', 'feed', 'blog']: # account[key] = [] # need to audit all assumed condenser keys.. from hive.steem.steem_client import SteemClient account = SteemClient.instance().get_accounts([name])[0] return account
async def call(api, method, params): # pylint: disable=line-too-long, protected-access, too-many-return-statements, too-many-branches if method == 'get_followers': return await get_followers(params[0], params[1], params[2], params[3]) elif method == 'get_following': return await get_following(params[0], params[1], params[2], params[3]) elif method == 'get_follow_count': return await get_follow_count(params[0]) elif method == 'get_discussions_by_trending': return await get_discussions_by_trending(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_hot': return await get_discussions_by_hot(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_promoted': return await get_discussions_by_promoted(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_created': return await get_discussions_by_created(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit'], params[0]['tag']) elif method == 'get_discussions_by_blog': return await get_discussions_by_blog(params[0]['tag'], params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_discussions_by_feed': return await get_discussions_by_feed(params[0]['tag'], params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_discussions_by_comments': return await get_discussions_by_comments(params[0]['start_author'], params[0]['start_permlink'], params[0]['limit']) elif method == 'get_replies_by_last_update': return await get_replies_by_last_update(params[0], params[1], params[2]) elif method == 'get_content': return await get_content(params[0], params[1]) # after submit vote/post elif method == 'get_content_replies': return await get_content_replies(params[0], params[1]) elif method == 'get_state': return await get_state(params[0]) # passthrough -- TESTING ONLY! steemd = SteemClient.instance() if method == 'get_dynamic_global_properties': return steemd._gdgp() # condenser only uses total_vesting_fund_steem, total_vesting_shares, sbd_interest_rate elif method == 'get_accounts': return steemd.get_accounts(params[0]) elif method == 'get_open_orders': return steemd._client.exec('get_open_orders', params[0]) elif method == 'get_block': return steemd._client.exec('get_block', params[0]) elif method == 'broadcast_transaction_synchronous': return steemd._client.exec('broadcast_transaction_synchronous', params[0], api='network_broadcast_api') elif method == 'get_savings_withdraw_to': return steemd._client.exec('get_savings_withdraw_to', params[0]) elif method == 'get_savings_withdraw_from': return steemd._client.exec('get_savings_withdraw_from', params[0]) raise Exception("unknown method: {}.{}({})".format(api, method, params))
def from_steemd(cls, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = SteemClient.instance() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() count = ubound - lbound if count < 1: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, count)) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print(timer.batch_status("[SYNC] Got block %d @ %s" % (to-1, date))) except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def _generate_cache_sqls(cls, accounts): """Prepare a SQL query from a steemd account.""" cached_at = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') sqls = [] for account in SteemClient.instance().get_accounts(accounts): vote_weight = (vests_amount(account['vesting_shares']) + vests_amount(account['received_vesting_shares']) - vests_amount(account['delegated_vesting_shares'])) # remove empty keys useless = [ 'transfer_history', 'market_history', 'post_history', 'vote_history', 'other_history', 'tags_usage', 'guest_bloggers' ] for key in useless: del account[key] # pull out valid profile md and delete the key profile = safe_profile_metadata(account) del account['json_metadata'] values = { 'name': account['name'], 'proxy': account['proxy'], 'post_count': account['post_count'], 'reputation': rep_log10(account['reputation']), 'proxy_weight': vests_amount(account['vesting_shares']), 'vote_weight': vote_weight, 'kb_used': int(account['lifetime_bandwidth']) / 1e6 / 1024, 'active_at': account['last_bandwidth_update'], 'cached_at': cached_at, 'display_name': profile['name'], 'about': profile['about'], 'location': profile['location'], 'website': profile['website'], 'profile_image': profile['profile_image'], 'cover_image': profile['cover_image'], 'raw_json': json.dumps(account) } update = ', '.join([k + " = :" + k for k in list(values.keys())][1:]) sql = "UPDATE hive_accounts SET %s WHERE name = :name" % (update) sqls.append((sql, values)) return sqls
def _update_batch(cls, tuples, trx=True, full_total=None): """Fetch, process, and write a batch of posts. Given a set of posts, fetch from steemd and write them to the db. The `tuples` arg is the form of `[(url, id, level)*]` representing posts which are to be fetched from steemd and updated in cache. Regarding _bump_last_id: there's a rare edge case when the last hive_post entry has been deleted "in the future" (ie, we haven't seen the delete op yet). So even when the post is not found (i.e. `not post['author']`), it's important to advance _last_id, because this cursor is used to deduce any missing cache entries. """ steemd = SteemClient.instance() timer = Timer(total=len(tuples), entity='post', laps=['rps', 'wps'], full_total=full_total) tuples = sorted(tuples, key=lambda x: x[1]) # enforce ASC id's for tups in partition_all(1000, tuples): timer.batch_start() buffer = [] post_args = [tup[0].split('/') for tup in tups] posts = steemd.get_content_batch(post_args) post_ids = [tup[1] for tup in tups] post_levels = [tup[2] for tup in tups] for pid, post, level in zip(post_ids, posts, post_levels): if post['author']: buffer.append(cls._sql(pid, post, level=level)) else: print("WARNING: ignoring deleted post {}".format(pid)) cls._bump_last_id(pid) timer.batch_lap() cls._batch_queries(buffer, trx) timer.batch_finish(len(posts)) if len(tuples) >= 1000: print(timer.batch_status())
async def get_state(path: str): """`get_state` reimplementation. See: https://github.com/steemit/steem/blob/06e67bd4aea73391123eca99e1a22a8612b0c47e/libraries/app/database_api.cpp#L1937 """ # pylint: disable=too-many-locals, too-many-branches, too-many-statements if path[0] == '/': path = path[1:] if not path: path = 'trending' part = path.split('/') if len(part) > 3: raise Exception("invalid path %s" % path) while len(part) < 3: part.append('') state = {} state['current_route'] = path state['props'] = _get_props_lite() state['tags'] = {} state['tag_idx'] = {} state['tag_idx']['trending'] = [] state['content'] = {} state['accounts'] = {} state['discussion_idx'] = {"": {}} state['feed_price'] = _get_feed_price() state1 = "{}".format(state) # account tabs (feed, blog, comments, replies) if part[0] and part[0][0] == '@': if not part[1]: part[1] = 'blog' if part[1] == 'transfers': raise Exception("transfers API not served by hive") if part[2]: raise Exception("unexpected account path part[2] %s" % path) account = part[0][1:] keys = { 'recent-replies': 'recent_replies', 'comments': 'comments', 'blog': 'blog', 'feed': 'feed' } if part[1] not in keys: raise Exception("invalid account path %s" % path) key = keys[part[1]] # TODO: use _load_accounts([account])? Examine issue w/ login account_obj = SteemClient.instance().get_accounts([account])[0] state['accounts'][account] = account_obj if key == 'recent_replies': posts = await get_replies_by_last_update(account, "", 20) elif key == 'comments': posts = await get_discussions_by_comments(account, "", 20) elif key == 'blog': posts = await get_discussions_by_blog(account, "", "", 20) elif key == 'feed': posts = await get_discussions_by_feed(account, "", "", 20) state['accounts'][account][key] = [] for post in posts: ref = post['author'] + '/' + post['permlink'] state['accounts'][account][key].append(ref) state['content'][ref] = post # discussion thread elif part[1] and part[1][0] == '@': author = part[1][1:] permlink = part[2] state['content'] = _load_discussion_recursive(author, permlink) accounts = set(map(lambda p: p['author'], state['content'].values())) state['accounts'] = {a['name']: a for a in _load_accounts(accounts)} # trending/etc pages elif part[0] in ['trending', 'promoted', 'hot', 'created']: if part[2]: raise Exception("unexpected discussion path part[2] %s" % path) sort = part[0] tag = part[1].lower() posts = cursor.pids_by_query(sort, '', '', 20, tag) state['discussion_idx'][tag] = {sort: []} for post in posts: ref = post['author'] + '/' + post['permlink'] state['content'][ref] = post state['discussion_idx'][tag][sort].append(ref) state['tag_idx']['trending'] = await _get_top_trending_tags() # witness list elif part[0] == 'witnesses': raise Exception("not implemented") # tag "explorer" elif part[0] == "tags": state['tag_idx']['trending'] = [] tags = await _get_trending_tags() for tag in tags: state['tag_idx']['trending'].append(tag['name']) state['tags'][tag['name']] = tag else: raise Exception("unknown path {}".format(path)) # (debug; should not happen) if state did not change, complain state2 = "{}".format(state) if state1 == state2: raise Exception("unrecognized path `{}`" % path) return state