def _process(cls, block, is_initial_sync=False): """Process a single block. Assumes a trx is open.""" #pylint: disable=too-many-branches num = cls._push(block) date = block['timestamp'] account_names = set() json_ops = [] for tx_idx, tx in enumerate(block['transactions']): for operation in tx['operations']: op_type = operation['type'] op = operation['value'] # account ops if op_type == 'pow_operation': account_names.add(op['worker_account']) elif op_type == 'pow2_operation': account_names.add( op['work']['value']['input']['worker_account']) elif op_type == 'account_create_operation': account_names.add(op['new_account_name']) elif op_type == 'account_create_with_delegation_operation': account_names.add(op['new_account_name']) elif op_type == 'create_claimed_account_operation': account_names.add(op['new_account_name']) # account metadata updates elif op_type == 'account_update_operation': if not is_initial_sync: Accounts.dirty(op['account']) # full elif op_type == 'account_update2_operation': if not is_initial_sync: Accounts.dirty(op['account']) # full # post ops elif op_type == 'comment_operation': Posts.comment_op(op, date) if not is_initial_sync: Accounts.dirty(op['author']) # lite - stats elif op_type == 'delete_comment_operation': Posts.delete_op(op) elif op_type == 'vote_operation': if not is_initial_sync: Accounts.dirty(op['author']) # lite - rep Accounts.dirty(op['voter']) # lite - stats CachedPost.vote(op['author'], op['permlink'], None, op['voter']) # misc ops elif op_type == 'transfer_operation': Payments.op_transfer(op, tx_idx, num, date) elif op_type == 'custom_json_operation': json_ops.append(op) Accounts.register(account_names, date) # register any new names CustomOp.process_ops(json_ops, num, date) # follow/reblog/community ops return num
def _process(cls, block, is_initial_sync=False): """Process a single block. Assumes a trx is open.""" # pylint: disable=too-many-boolean-expressions,too-many-branches num = cls._push(block) date = block['timestamp'] account_names = set() comment_ops = [] json_ops = [] delete_ops = [] voted_authors = set() for tx_idx, tx in enumerate(block['transactions']): for operation in tx['operations']: if isinstance(operation, dict): op_type = operation['type'].split('_operation')[0] op = operation['value'] else: # pre-appbase-style. remove after deploy. #APPBASE op_type, op = operation # account ops if op_type == 'pow': account_names.add(op['worker_account']) elif op_type == 'pow2': # old style. remove after #APPBASE #account_names.add(op['work'][1]['input']['worker_account']) account_names.add( op['work']['value']['input']['worker_account']) elif op_type == 'account_create': account_names.add(op['new_account_name']) elif op_type == 'account_create_with_delegation': account_names.add(op['new_account_name']) # post ops elif op_type == 'comment': comment_ops.append(op) elif op_type == 'delete_comment': delete_ops.append(op) elif op_type == 'vote': if not is_initial_sync: CachedPost.vote(op['author'], op['permlink']) voted_authors.add( op['author']) # TODO: move to cachedpost # misc ops elif op_type == 'transfer': Payments.op_transfer(op, tx_idx, num, date) elif op_type == 'custom_json': json_ops.append(op) Accounts.register(account_names, date) # register any new names Accounts.dirty(voted_authors) # update rep of voted authors Posts.comment_ops(comment_ops, date) # handle inserts, edits Posts.delete_ops(delete_ops) # handle post deletion CustomOp.process_ops(json_ops, num, date) # follow/reblog/community ops return num
def close_own_db_access(cls): PostDataCache.close_own_db_access() Reputations.close_own_db_access() Votes.close_own_db_access() Follow.close_own_db_access() Posts.close_own_db_access() Reblog.close_own_db_access() Notify.close_own_db_access() Accounts.close_own_db_access() PayoutStats.close_own_db_access() Mentions.close_own_db_access()
def setup_own_db_access(cls, sharedDbAdapter): PostDataCache.setup_own_db_access(sharedDbAdapter, "PostDataCache") Reputations.setup_own_db_access(sharedDbAdapter, "Reputations") Votes.setup_own_db_access(sharedDbAdapter, "Votes") Follow.setup_own_db_access(sharedDbAdapter, "Follow") Posts.setup_own_db_access(sharedDbAdapter, "Posts") Reblog.setup_own_db_access(sharedDbAdapter, "Reblog") Notify.setup_own_db_access(sharedDbAdapter, "Notify") Accounts.setup_own_db_access(sharedDbAdapter, "Accounts") PayoutStats.setup_own_db_access(sharedDbAdapter, "PayoutStats") Mentions.setup_own_db_access(sharedDbAdapter, "Mentions")
def _process(cls, block, is_initial_sync=False): """Process a single block. Assumes a trx is open.""" num = cls._push(block) date = block['timestamp'] account_names = set() comment_ops = [] json_ops = [] delete_ops = [] for tx_idx, tx in enumerate(block['transactions']): for operation in tx['operations']: op_type = operation['type'] op = operation['value'] # account ops if op_type == 'pow_operation': account_names.add(op['worker_account']) elif op_type == 'pow2_operation': account_names.add( op['work']['value']['input']['worker_account']) elif op_type == 'account_create_operation': account_names.add(op['new_account_name']) elif op_type == 'account_create_with_delegation_operation': account_names.add(op['new_account_name']) elif op_type == 'create_claimed_account_operation': account_names.add(op['new_account_name']) # post ops elif op_type == 'comment_operation': comment_ops.append(op) elif op_type == 'delete_comment_operation': delete_ops.append(op) elif op_type == 'vote_operation': if not is_initial_sync: CachedPost.vote(op['author'], op['permlink']) # misc ops elif op_type == 'transfer_operation': Payments.op_transfer(op, tx_idx, num, date) elif op_type == 'custom_json_operation': json_ops.append(op) Accounts.register(account_names, date) # register any new names Posts.comment_ops(comment_ops, date) # handle inserts, edits Posts.delete_ops(delete_ops) # handle post deletion CustomOp.process_ops(json_ops, num, date) # follow/reblog/community ops return num
def reblog(cls, account, op_json, block_date): """Handle legacy 'reblog' op""" blogger = op_json['account'] author = op_json['author'] permlink = op_json['permlink'] if blogger != account: return # impersonation if not all(map(Accounts.exists, [author, blogger])): return post_id, depth = Posts.get_id_and_depth(author, permlink) if depth > 0: return # prevent comment reblogs if not post_id: log.debug("reblog: post not found: %s/%s", author, permlink) return if 'delete' in op_json and op_json['delete'] == 'delete': DB.query("DELETE FROM hive_reblogs WHERE account = :a AND " "post_id = :pid LIMIT 1", a=blogger, pid=post_id) if not DbState.is_initial_sync(): FeedCache.delete(post_id, Accounts.get_id(blogger)) else: sql = ("INSERT INTO hive_reblogs (account, post_id, created_at) " "VALUES (:a, :pid, :date) ON CONFLICT (account, post_id) DO NOTHING") DB.query(sql, a=blogger, pid=post_id, date=block_date) if not DbState.is_initial_sync(): FeedCache.insert(post_id, Accounts.get_id(blogger), block_date)
def check_ad_payment(cls, op, date, num): """Triggers an adFund operation for validated Native Ads transfers.""" memo = op['memo'] try: payment = cls._valid_payment(memo) if payment: amount, token = parse_amount(op['amount'], bypass_nai_lookup=True) params = { 'amount': amount, 'token': token, 'to_account': op['to'], 'community_name': payment['community_name'] } from hive.indexer.accounts import Accounts from hive.indexer.posts import Posts _post_id = Posts.get_id(op['from'], payment['permlink']) assert _post_id, 'post not found: @%s/%s' % ( op['from'], payment['permlink']) _account_id = Accounts.get_id(op['from']) _community_id = payment['community_id'] ad_op = NativeAdOp(_community_id, _post_id, _account_id, { 'action': 'adFund', 'params': params }, num) ad_op.validate_op() ad_op.process() except AssertionError as e: payload = str(e) Notify('error', dst_id=_account_id, when=date, payload=payload).write()
def _validated(cls, op, tx_idx, num, date): """Validate and normalize the transfer op.""" # pylint: disable=unused-argument if op['to'] != 'null': return # only care about payments to null amount, token = parse_amount(op['amount']) if token != 'SBD': return # only care about SBD payments url = op['memo'] if not cls._validate_url(url): log.debug("invalid url: %s", url) return # invalid url author, permlink = cls._split_url(url) if not Accounts.exists(author): return post_id = Posts.get_id(author, permlink) if not post_id: log.debug("post does not exist: %s", url) return return { 'id': None, 'block_num': num, 'tx_idx': tx_idx, 'post_id': post_id, 'from_account': Accounts.get_id(op['from']), 'to_account': Accounts.get_id(op['to']), 'amount': amount, 'token': token }
def _validated(cls, op, tx_idx, num, date): if op['to'] != 'null': return # only care about payments to null amount, token = parse_amount(op['amount']) if token != 'SBD': return # only care about SBD payments url = op['memo'] if not cls._validate_url(url): print("invalid url: {}".format(url)) return # invalid url author, permlink = cls._split_url(url) if not Accounts.exists(author): return post_id = Posts.get_id(author, permlink) if not post_id: print("post does not exist: %s" % url) return return { 'id': None, 'block_num': num, 'tx_idx': tx_idx, 'post_id': post_id, 'from_account': Accounts.get_id(op['from']), 'to_account': Accounts.get_id(op['to']), 'amount': amount, 'token': token }
def _select_missing_tuples(cls, last_cached_id, limit=1000000): from hive.indexer.posts import Posts sql = """SELECT id, author, permlink, promoted FROM hive_posts WHERE is_deleted = '0' AND id > :id ORDER BY id LIMIT :limit""" results = DB.query_all(sql, id=last_cached_id, limit=limit) return Posts.save_ids_from_tuples(results)
def _process(cls, block, is_initial_sync=False): num = cls._push(block) date = block['timestamp'] account_names = set() comment_ops = [] json_ops = [] delete_ops = [] voted_authors = set() for tx_idx, tx in enumerate(block['transactions']): for operation in tx['operations']: op_type, op = operation # account ops if op_type == 'pow': account_names.add(op['worker_account']) elif op_type == 'pow2': account_names.add(op['work'][1]['input']['worker_account']) elif op_type == 'account_create': account_names.add(op['new_account_name']) elif op_type == 'account_create_with_delegation': account_names.add(op['new_account_name']) # post ops elif op_type == 'comment': comment_ops.append(op) elif op_type == 'delete_comment': delete_ops.append(op) elif op_type == 'vote': if not is_initial_sync: CachedPost.vote(op['author'], op['permlink']) voted_authors.add( op['author']) # TODO: move to cachedpost # misc ops elif op_type == 'transfer': Payments.op_transfer(op, tx_idx, num, date) elif op_type == 'custom_json': json_ops.append(op) Accounts.register(account_names, date) # register any new names Accounts.dirty(voted_authors) # update rep of voted authors Posts.comment_ops(comment_ops, date) # handle inserts, edits Posts.delete_ops(delete_ops) # handle post deletion CustomOp.process_ops(json_ops, num, date) # follow/reblog/community ops return num
def listen_steemd(trail_blocks=2): steemd = get_adapter() curr_block = db_last_block() last_hash = False while True: curr_block = curr_block + 1 # if trailing too close, take a pause while trail_blocks > 0: if curr_block <= steemd.head_block() - trail_blocks: break time.sleep(0.5) # get the target block; if DNE, pause and retry block = steemd.get_block(curr_block) while not block: time.sleep(0.5) block = steemd.get_block(curr_block) num = int(block['block_id'][:8], base=16) print("[LIVE] Got block {} at {} with {} txs -- ".format( num, block['timestamp'], len(block['transactions'])), end='') # ensure the block we received links to our last if last_hash and last_hash != block['previous']: # this condition is very rare unless trail_blocks is 0 and fork is # encountered; to handle gracefully, implement a pop_block method raise Exception("Unlinkable block: have {}, got {} -> {})".format( last_hash, block['previous'], block['block_id'])) last_hash = block['block_id'] start_time = time.perf_counter() query("START TRANSACTION") dirty = process_block(block) update_posts_batch(Posts.urls_to_tuples(dirty), steemd, block['timestamp']) paidout = select_paidout_posts(block['timestamp']) update_posts_batch(paidout, steemd, block['timestamp']) Accounts.cache_dirty() Accounts.cache_dirty_follows() print("{} edits, {} payouts".format(len(dirty), len(paidout))) query("COMMIT") secs = time.perf_counter() - start_time if secs > 1: print("WARNING: block {} process took {}s".format(num, secs)) # approx once per hour, update accounts if num % 1200 == 0: print("Performing account maintenance...") Accounts.cache_old() Accounts.update_ranks()
def _load_noids(cls): from hive.indexer.posts import Posts noids = cls._noids - set(cls._ids.keys()) tuples = [(Posts.get_id(*url.split('/')), url) for url in noids] for pid, url in tuples: assert pid, "WARNING: missing id for %s" % url cls._ids[url] = pid cls._noids = set() return len(tuples)
def _select_paidout_tuples(cls, date): from hive.indexer.posts import Posts sql = """SELECT post_id FROM hive_posts_cache WHERE is_paidout = '0' AND payout_at <= :date""" ids = DB.query_col(sql, date=date) if not ids: return [] sql = """SELECT id, author, permlink FROM hive_posts WHERE id IN :ids""" results = DB.query_all(sql, ids=tuple(ids)) return Posts.save_ids_from_tuples(results)
def _read_permlink(self): assert self.account, 'permlink requires named account' _permlink = read_key_str(self.op, 'permlink', 256) assert _permlink, 'must name a permlink' from hive.indexer.posts import Posts _pid = Posts.get_id(self.account, _permlink) assert _pid, 'invalid post: %s/%s' % (self.account, _permlink) sql = """SELECT community_id FROM hive_posts WHERE id = :id LIMIT 1""" _comm = DB.query_one(sql, id=_pid) assert self.community_id == _comm, 'post does not belong to community' self.permlink = _permlink self.post_id = _pid
def audit_cache_undelete(db, steem): """Scan all posts to check for posts erroneously deleted.""" last_id = _last_post_id(db) step = 1000000 steps = int(last_id / step) + 1 log.info("last post id: %d, batches: %d", last_id, steps) sql = """ SELECT id, author, permlink FROM hive_posts WHERE is_deleted = True AND id BETWEEN :lbound AND :ubound """ for idx in range(steps): lbound = (idx * step) + 1 ubound = (idx + 1) * step rows = db.query_all(sql, lbound=lbound, ubound=ubound) log.info("%d <= id <= %d: %d to check", lbound, ubound, len(rows)) if not rows: continue post_args = [(row['author'], row['permlink']) for row in rows] posts = steem.get_content_batch(post_args) recovered = 0 for row, post in zip(rows, posts): if post['author']: recovered += 1 Posts.undelete(post, post['created'], row['id']) log.info("%d <= id <= %d: %d recovered", lbound, ubound, recovered) if recovered: CachedPost.flush(steem, trx=True)
def _load_noids(cls): """Load ids for posts we don't know the ids of. When posts are marked dirty, specifying the id is optional because a successive call might be able to provide it "for free". Before flushing changes this method should be called to fill in any gaps. """ from hive.indexer.posts import Posts noids = cls._noids - set(cls._ids.keys()) tuples = [(Posts.get_id(*url.split('/')), url) for url in noids] for pid, url in tuples: assert pid, "WARNING: missing id for %s" % url cls._ids[url] = pid cls._noids = set() return len(tuples)
def dirty_missing(cls, limit=1000000): from hive.indexer.posts import Posts # cached posts inserted sequentially, so compare MAX(id)'s last_cached_id = cls.last_id() last_post_id = Posts.last_id() gap = last_post_id - last_cached_id if gap: missing = cls._select_missing_tuples(last_cached_id, limit) for pid, author, permlink, promoted in missing: if promoted > 0: # ensure we don't miss promote amount cls.update_promoted_amount(pid, promoted) cls._dirty('insert', author, permlink, pid) return gap
def sync_from_steemd(is_initial_sync): steemd = get_adapter() dirty = set() lbound = db_last_block() + 1 ubound = steemd.last_irreversible_block_num() print("[SYNC] {} blocks to batch sync".format(ubound - lbound + 1)) print("[SYNC] start sync from block %d" % lbound) while lbound < ubound: to = min(lbound + 1000, ubound) lap_0 = time.perf_counter() blocks = steemd.get_blocks_range(lbound, to) lap_1 = time.perf_counter() dirty |= process_blocks(blocks, is_initial_sync) lap_2 = time.perf_counter() rate = (to - lbound) / (lap_2 - lap_0) rps = int((to - lbound) / (lap_1 - lap_0)) wps = int((to - lbound) / (lap_2 - lap_1)) print( "[SYNC] Got block {} ({}/s, {}rps {}wps) -- {}m remaining".format( to - 1, round(rate, 1), rps, wps, round((ubound - to) / rate / 60, 2))) lbound = to # batch update post cache after catching up to head block if not is_initial_sync: print("[PREP] Update {} edited posts".format(len(dirty))) update_posts_batch(Posts.urls_to_tuples(dirty), steemd) date = steemd.head_time() paidout = select_paidout_posts(date) print("[PREP] Process {} payouts since {}".format(len(paidout), date)) update_posts_batch(paidout, steemd, date)
def _select_paidout_tuples(cls, date): """Query hive_posts_cache for payout sweep. Select all posts which should have been paid out before `date` yet do not have the `is_paidout` flag set. We perform this sweep to ensure that we always have accurate final payout state. Since payout values vary even between votes, we'd have stale data if we didn't sweep, and only waited for incoming votes before an update. """ from hive.indexer.posts import Posts sql = """SELECT post_id FROM hive_posts_cache WHERE is_paidout = '0' AND payout_at <= :date""" ids = DB.query_col(sql, date=date) if not ids: return [] sql = """SELECT id, author, permlink FROM hive_posts WHERE id IN :ids""" results = DB.query_all(sql, ids=tuple(ids)) return Posts.save_ids_from_tuples(results)
def _process(cls, block): """Process a single block. Assumes a trx is open.""" #pylint: disable=too-many-branches assert issubclass(type(block), Block) num = cls._push(block) cls._current_block_date = block.get_date() # head block date shall point to last imported block (not yet current one) to conform hived behavior. # that's why operations processed by node are included in the block being currently produced, so its processing time is equal to last produced block. # unfortunately it is not true to all operations, most likely in case of dates that used to come from # FatNode where it supplemented it with its-current head block, since it was already past block processing, # it saw later block (equal to _current_block_date here) if cls._head_block_date is None: cls._head_block_date = cls._current_block_date ineffective_deleted_ops = Blocks.prepare_vops( Posts.comment_payout_ops, block, cls._current_block_date, num, num <= cls._last_safe_cashout_block) json_ops = [] for transaction in block.get_next_transaction(): assert issubclass(type(transaction), Transaction) for operation in transaction.get_next_operation(): assert issubclass(type(operation), Operation) start = OPSM.start() op_type = operation.get_type() assert op_type, "Only supported types are expected" op = operation.get_body() assert 'block_num' not in op op['block_num'] = num account_name = None op_details = None potentially_new_account = False # account ops if op_type == OperationType.Pow: account_name = op['worker_account'] potentially_new_account = True elif op_type == OperationType.Pow2: account_name = op['work']['value']['input'][ 'worker_account'] potentially_new_account = True elif op_type == OperationType.AccountCreate: account_name = op['new_account_name'] op_details = op potentially_new_account = True elif op_type == OperationType.AccountCreateWithDelegation: account_name = op['new_account_name'] op_details = op potentially_new_account = True elif op_type == OperationType.CreateClaimedAccount: account_name = op['new_account_name'] op_details = op potentially_new_account = True if potentially_new_account and not Accounts.register( account_name, op_details, cls._head_block_date, num): log.error( "Failed to register account {} from operation: {}". format(account_name, op)) # account metadata updates if op_type == OperationType.AccountUpdate: Accounts.update_op(op, False) elif op_type == OperationType.AccountUpdate2: Accounts.update_op(op, True) # post ops elif op_type == OperationType.Comment: Posts.comment_op(op, cls._head_block_date) elif op_type == OperationType.DeleteComment: key = "{}/{}".format(op['author'], op['permlink']) if key not in ineffective_deleted_ops: Posts.delete_op(op, cls._head_block_date) elif op_type == OperationType.CommentOption: Posts.comment_options_op(op) elif op_type == OperationType.Vote: Votes.vote_op(op, cls._head_block_date) # misc ops elif op_type == OperationType.Transfer: Payments.op_transfer(op, transaction.get_id(), num, cls._head_block_date) elif op_type == OperationType.CustomJson: # follow/reblog/community ops CustomOp.process_op(op, num, cls._head_block_date) OPSM.op_stats(str(op_type), OPSM.stop(start)) cls._head_block_date = cls._current_block_date return num
def process_json_community_op(account, op_json, date): """Validates community op and apply state changes to db.""" #pylint: disable=line-too-long cmd_name, cmd_op = op_json # ['flagPost', {community: '', author: '', ...}] commands = list(flatten(PERMISSIONS.values())) if cmd_name not in commands: return print("community op from {} @ {} -- {}".format(account, date, op_json)) community = cmd_op['community'] community_exists = is_community(community) # special case: community creation. TODO: does this require ACTIVE auth? or POSTING will suffice? if cmd_name == 'create' and not community_exists: if account != community: # only the OWNER may create return ctype = cmd_op['type'] # restricted, open-comment, public # INSERT INTO hive_communities (account, name, about, description, lang, is_nsfw, is_private, created_at) # VALUES ('%s', '%s', '%s', '%s', '%s', %d, %d, '%s')" % [account, name, about, description, lang, is_nsfw ? 1 : 0, is_private ? 1 : 0, block_date] # INSERT ADMINS--- # validate permissions if not community_exists or not is_permitted(account, community, cmd_name): return # If command references a post, ensure it's valid post_id, depth = Posts.get_id_and_depth(cmd_op.get('author'), cmd_op.get('permlink')) if not post_id: return # If command references an account, ensure it's valid account_id = Accounts.get_id(cmd_op.get('account')) # If command references a list of accounts, ensure they are valid account_ids = list(map(Accounts.get_id, cmd_op.get('accounts'))) # ADMIN Actions # ------------- if cmd_name == 'add_admins': assert account_ids # UPDATE hive_members SET is_admin = 1 WHERE account IN (%s) AND community = '%s' if cmd_name == 'remove_admins': assert account_ids # todo: validate at least one admin remains!!! # UPDATE hive_members SET is_admin = 0 WHERE account IN (%s) AND community = '%s' if cmd_name == 'add_mods': assert account_ids # UPDATE hive_members SET is_mod = 1 WHERE account IN (%s) AND community = '%s' if cmd_name == 'remove_mods': assert account_ids # UPDATE hive_members SET is_mod = 0 WHERE account IN (%s) AND community = '%s' # MOD USER Actions # ---------------- if cmd_name == 'update_settings': # name, about, description, lang, is_nsfw # settings {bg_color, bg_color2, text_color} # UPDATE hive_communities SET .... WHERE community = '%s' assert account_id if cmd_name == 'add_posters': assert account_ids # UPDATE hive_members SET is_approved = 1 WHERE account IN (%s) AND community = '%s' if cmd_name == 'remove_posters': assert account_ids # UPDATE hive_members SET is_approved = 0 WHERE account IN (%s) AND community = '%s' if cmd_name == 'mute_user': assert account_id # UPDATE hive_members SET is_muted = 1 WHERE account = '%s' AND community = '%s' if cmd_name == 'unmute_user': assert account_id # UPDATE hive_members SET is_muted = 0 WHERE account = '%s' AND community = '%s' if cmd_name == 'set_user_title': assert account_id # UPDATE hive_members SET title = '%s' WHERE account = '%s' AND community = '%s' # MOD POST Actions # ---------------- if cmd_name == 'mute_post': assert post_id # assert all([account_id, post_id]) # UPDATE hive_posts SET is_muted = 1 WHERE community = '%s' AND author = '%s' AND permlink = '%s' if cmd_name == 'unmute_post': assert post_id # UPDATE hive_posts SET is_muted = 0 WHERE community = '%s' AND author = '%s' AND permlink = '%s' if cmd_name == 'pin_post': assert post_id # UPDATE hive_posts SET is_pinned = 1 WHERE community = '%s' AND author = '%s' AND permlink = '%s' if cmd_name == 'unpin_post': assert post_id # UPDATE hive_posts SET is_pinned = 0 WHERE community = '%s' AND author = '%s' AND permlink = '%s' # GUEST POST Actions # ------------------ if cmd_name == 'flag_post': assert post_id # INSERT INTO hive_flags (account, community, author, permlink, comment, created_at) VALUES () # track success (TODO: failures as well?) # INSERT INTO hive_modlog (account, community, action, created_at) VALUES (account, community, json.inspect, block_date) return True
def process_block(block, is_initial_sync=False): date = block['timestamp'] block_id = block['block_id'] prev = block['previous'] block_num = int(block_id[:8], base=16) txs = block['transactions'] query( "INSERT INTO hive_blocks (num, hash, prev, txs, created_at) " "VALUES (:num, :hash, :prev, :txs, :date)", num=block_num, hash=block_id, prev=prev, txs=len(txs), date=date) accounts = set() comments = [] json_ops = [] deleted = [] dirty = set() for tx in txs: for operation in tx['operations']: op_type, op = operation if op_type == 'pow': accounts.add(op['worker_account']) elif op_type == 'pow2': accounts.add(op['work'][1]['input']['worker_account']) elif op_type in [ 'account_create', 'account_create_with_delegation' ]: accounts.add(op['new_account_name']) elif op_type == 'comment': comments.append(op) dirty.add(op['author'] + '/' + op['permlink']) Accounts.dirty(op['author']) if op['parent_author']: Accounts.dirty(op['parent_author']) elif op_type == 'delete_comment': deleted.append(op) elif op_type == 'custom_json': json_ops.append(op) elif op_type == 'vote': dirty.add(op['author'] + '/' + op['permlink']) Accounts.dirty(op['author']) Accounts.dirty(op['voter']) Accounts.register( accounts, date) # if an account does not exist, mark it as created in this block Posts.register( comments, date ) # if this is a new post, add the entry and validate community param Posts.delete(deleted) # mark hive_posts.is_deleted = 1 for op in json_ops: if op['id'] not in ['follow', 'com.steemit.community']: continue # we are assuming `required_posting_auths` is always used and length 1. # it may be that some ops will require `required_active_auths` instead # (e.g. if we use that route for admin action of acct creation) # if op['required_active_auths']: # log.warning("unexpected active auths: %s" % op) if len(op['required_posting_auths']) != 1: log.warning("unexpected auths: %s" % op) continue account = op['required_posting_auths'][0] op_json = load_json_key(op, 'json') if op['id'] == 'follow': if block_num < 6000000 and type(op_json) != list: op_json = ['follow', op_json] # legacy compat process_json_follow_op(account, op_json, date) elif op['id'] == 'com.steemit.community': if block_num > 13e6: process_json_community_op(account, op_json, date) # return all posts modified this block return dirty
def generate_cached_post_sql(pid, post, updated_at): if not post['author']: raise Exception("ERROR: post id {} has no chain state.".format(pid)) md = None try: md = json.loads(post['json_metadata']) if not isinstance(md, dict): md = {} except json.decoder.JSONDecodeError: pass thumb_url = '' if md and 'image' in md: thumb_url = safe_img_url(first(md['image'])) or '' md['image'] = [thumb_url] # clean up tags, check if nsfw tags = [post['category']] if md and 'tags' in md and isinstance(md['tags'], list): tags = tags + md['tags'] tags = set(list(map(lambda tag: (str(tag) or '').strip('# ').lower()[:32], tags))[0:5]) tags.discard('') is_nsfw = int('nsfw' in tags) # payout date is last_payout if paid, and cashout_time if pending. is_paidout = (post['cashout_time'][0:4] == '1969') payout_at = post['last_payout'] if is_paidout else post['cashout_time'] # get total rshares, and create comma-separated vote data blob rshares = sum(int(v['rshares']) for v in post['active_votes']) csvotes = "\n".join(map(vote_csv_row, post['active_votes'])) payout_declined = False if amount(post['max_accepted_payout']) == 0: payout_declined = True elif len(post['beneficiaries']) == 1: benny = first(post['beneficiaries']) if benny['account'] == 'null' and int(benny['weight']) == 10000: payout_declined = True full_power = int(post['percent_steem_dollars']) == 0 # total payout (completed and/or pending) payout = sum([ amount(post['total_payout_value']), amount(post['curator_payout_value']), amount(post['pending_payout_value']), ]) # total promotion cost promoted = amount(post['promoted']) # trending scores timestamp = parse_time(post['created']).timestamp() hot_score = score(rshares, timestamp, 10000) trend_score = score(rshares, timestamp, 480000) if post['body'].find('\x00') > -1: print("bad body: {}".format(post['body'])) post['body'] = "INVALID" children = post['children'] if children > 32767: children = 32767 stats = Posts.get_post_stats(post) values = collections.OrderedDict([ ('post_id', '%d' % pid), ('author', "%s" % post['author']), ('permlink', "%s" % post['permlink']), ('category', "%s" % post['category']), ('depth', "%d" % post['depth']), ('children', "%d" % children), ('title', "%s" % post['title']), ('preview', "%s" % post['body'][0:1024]), ('body', "%s" % post['body']), ('img_url', "%s" % thumb_url), ('payout', "%f" % payout), ('promoted', "%f" % promoted), ('payout_at', "%s" % payout_at), ('updated_at', "%s" % updated_at), ('created_at', "%s" % post['created']), ('rshares', "%d" % rshares), ('votes', "%s" % csvotes), ('json', "%s" % json.dumps(md)), ('is_nsfw', "%d" % is_nsfw), ('is_paidout', "%d" % is_paidout), ('sc_trend', "%f" % trend_score), ('sc_hot', "%f" % hot_score), ('flag_weight', "%f" % stats['flag_weight']), ('total_votes', "%d" % stats['total_votes']), ('up_votes', "%d" % stats['up_votes']), ('is_hidden', "%d" % stats['hide']), ('is_grayed', "%d" % stats['gray']), ('author_rep', "%f" % stats['author_rep']), ('raw_json', "%s" % json.dumps(post)), # TODO: remove body, json_md, active_votes(?) ('is_declined', "%d" % int(payout_declined)), ('is_full_power', "%d" % int(full_power)), ]) fields = values.keys() # Multiple SQL statements are generated for each post sqls = [] # Update main metadata in the hive_posts_cache table cols = ', '.join(fields) params = ', '.join([':'+k for k in fields]) update = ', '.join([k+" = :"+k for k in fields][1:]) sql = "INSERT INTO hive_posts_cache (%s) VALUES (%s) ON CONFLICT (post_id) DO UPDATE SET %s" sqls.append((sql % (cols, params, update), values)) # update tag metadata only for top-level posts if not post['parent_author']: sql = "DELETE FROM hive_post_tags WHERE post_id = :id" sqls.append((sql, {'id': pid})) if tags: sql = "INSERT INTO hive_post_tags (post_id, tag) VALUES " params = {} vals = [] for i, tag in enumerate(tags): vals.append("(:id, :t%d)" % i) params["t%d"%i] = tag sqls.append((sql + ','.join(vals) + " ON CONFLICT DO NOTHING", {'id': pid, **params})) return sqls
def process_json_follow_op(account, op_json, block_date): """ Process legacy 'follow' plugin ops (follow/mute/clear, reblog) """ if type(op_json) != list: return if len(op_json) != 2: return if first(op_json) not in ['follow', 'reblog']: return if not isinstance(second(op_json), dict): return cmd, op_json = op_json # ['follow', {data...}] if cmd == 'follow': if type(op_json['what']) != list: return what = first(op_json['what']) or 'clear' if what not in ['blog', 'clear', 'ignore']: return if not all([key in op_json for key in ['follower', 'following']]): print("bad follow op: {} {}".format(block_date, op_json)) return follower = op_json['follower'] following = op_json['following'] if follower != account: return # impersonation if not all(map(Accounts.exists, [follower, following])): return # invalid input sql = """ INSERT INTO hive_follows (follower, following, created_at, state) VALUES (:fr, :fg, :at, :state) ON DUPLICATE KEY UPDATE state = :state """ state = {'clear': 0, 'blog': 1, 'ignore': 2}[what] query(sql, fr=follower, fg=following, at=block_date, state=state) Accounts.dirty_follows(follower) Accounts.dirty_follows(following) elif cmd == 'reblog': blogger = op_json['account'] author = op_json['author'] permlink = op_json['permlink'] if blogger != account: return # impersonation if not all(map(Accounts.exists, [author, blogger])): return post_id, depth = Posts.get_id_and_depth(author, permlink) if depth > 0: return # prevent comment reblogs if not post_id: print("reblog: post not found: {}/{}".format(author, permlink)) return if 'delete' in op_json and op_json['delete'] == 'delete': query( "DELETE FROM hive_reblogs WHERE account = :a AND post_id = :pid LIMIT 1", a=blogger, pid=post_id) sql = "DELETE FROM hive_feed_cache WHERE account = :account AND post_id = :id" query(sql, account=blogger, id=post_id) else: query( "INSERT IGNORE INTO hive_reblogs (account, post_id, created_at) " "VALUES (:a, :pid, :date)", a=blogger, pid=post_id, date=block_date) sql = "INSERT IGNORE INTO hive_feed_cache (account, post_id, created_at) VALUES (:account, :id, :created_at)" query(sql, account=blogger, id=post_id, created_at=block_date)