def pids_by_replies_to_account(start_author: str, start_permlink: str = '', limit: int = 20): """Get a list of post_ids representing replies to an author.""" seek = '' if start_permlink: sql = """ SELECT parent.author, child.created_at FROM hive_posts child JOIN hive_posts parent ON child.parent_id = parent.id WHERE child.author = :author AND child.permlink = :permlink """ account, start_date = query_row(sql, author=start_author, permlink=start_permlink) seek = "AND created_at <= '%s'" % start_date else: account = start_author sql = """ SELECT id FROM hive_posts WHERE parent_id IN (SELECT id FROM hive_posts WHERE author = :parent) %s ORDER BY created_at DESC LIMIT :limit """ % seek return query_col(sql, parent=account, limit=limit)
async def db_head_state(): sql = ("SELECT num,created_at,extract(epoch from created_at) ts " "FROM hive_blocks ORDER BY num DESC LIMIT 1") row = query_row(sql) return dict(db_head_block=row['num'], db_head_time=str(row['created_at']), db_head_age=int(time.time() - row['ts']))
def _build_post(cls, op, date, pid=None): # either a top-level post or comment (with inherited props) if not op['parent_author']: parent_id = None depth = 0 category = op['parent_permlink'] community = cls._get_op_community(op) or op['author'] else: parent_id = cls.get_id(op['parent_author'], op['parent_permlink']) sql = "SELECT depth,category,community FROM hive_posts WHERE id=:id" parent_depth, category, community = query_row(sql, id=parent_id) depth = parent_depth + 1 # check post validity in specified context is_valid = is_community_post_valid(community, op) if not is_valid: url = "@{}/{}".format(op['author'], op['permlink']) print("Invalid post {} in @{}".format(url, community)) return dict(author=op['author'], permlink=op['permlink'], id=pid, is_valid=is_valid, parent_id=parent_id, depth=depth, category=category, community=community, date=date)
async def get_replies_by_last_update(start_author: str, start_permlink: str = '', limit: int = 20): limit = _validate_limit(limit, 50) parent = start_author seek = '' if start_permlink: parent, start_date = query_row(""" SELECT p.author, c.created_at FROM hive_posts c JOIN hive_posts p ON c.parent_id = p.id WHERE c.author = :a AND c.permlink = :p """, a=start_author, p=start_permlink) seek = "AND created_at <= '%s'" % start_date sql = """ SELECT id FROM hive_posts WHERE parent_id IN (SELECT id FROM hive_posts WHERE author = :parent) %s ORDER BY created_at DESC LIMIT :limit """ % seek ids = query_col(sql, parent=parent, limit=limit) return _get_posts(ids)
def load_posts(ids, truncate_body=0): """Given an array of post ids, returns full objects in the same order.""" if not ids: return [] sql = """ SELECT post_id, author, permlink, title, body, promoted, payout, created_at, payout_at, is_paidout, rshares, raw_json, category, depth, json, children, votes, author_rep, updated_at, preview, img_url, is_nsfw FROM hive_posts_cache WHERE post_id IN :ids """ # key by id so we can return sorted by input order posts_by_id = {} for row in query_all(sql, ids=tuple(ids)): row = dict(row) post = _condenser_post_object(row, truncate_body=truncate_body) posts_by_id[row['post_id']] = post # in rare cases of cache inconsistency, recover and warn missed = set(ids) - posts_by_id.keys() if missed: log.warning("get_posts do not exist in cache: %s", repr(missed)) for _id in missed: sql = ( "SELECT id, author, permlink, depth, created_at, is_deleted " "FROM hive_posts WHERE id = :id") log.warning("missing: %s", dict(query_row(sql, id=_id))) ids.remove(_id) return [posts_by_id[_id] for _id in ids]
def get_id_and_depth(cls, author, permlink): res = query_row( "SELECT id, depth FROM hive_posts WHERE " "author = :a AND permlink = :p", a=author, p=permlink) return res or (None, -1)
def run(): # if tables not created, do so now if not query_row('SHOW TABLES'): print("[INIT] No tables found. Initializing db...") setup() #TODO: if initial sync is interrupted, cache never rebuilt #TODO: do not build partial feed_cache during init_sync # if this is the initial sync, batch updates until very end is_initial_sync = not query_one("SELECT 1 FROM hive_posts_cache LIMIT 1") if is_initial_sync: print("[INIT] *** Initial sync ***") else: # perform cleanup in case process did not exit cleanly cache_missing_posts() # fast block sync strategies sync_from_checkpoints(is_initial_sync) sync_from_steemd(is_initial_sync) if is_initial_sync: print("[INIT] *** Initial sync complete. Rebuilding cache. ***") cache_missing_posts() rebuild_feed_cache() # initialization complete. follow head blocks listen_steemd()
async def get_follow_count(account: str): sql = """ SELECT name as account, following as following_count, followers as follower_count FROM hive_accounts WHERE name = :n """ return dict(query_row(sql, n=account))
def urls_to_tuples(cls, urls): tuples = [] for url in urls: author, permlink = url.split('/') pid, is_deleted = query_row("SELECT id,is_deleted FROM hive_posts " "WHERE author = :a AND permlink = :p", a=author, p=permlink) if not pid: raise Exception("Post not found! {}/{}".format(author, permlink)) if is_deleted: continue tuples.append([pid, author, permlink]) return tuples
def is_community_post_valid(community, comment_op: dict) -> str: """ Given a new Steem post/comment, check if valid as per community rules For a comment to be valid, these conditions apply: - Post must be new (edits don't count) - Author is allowed to post in this community (membership & privacy) - Author is not muted in this community Args: community (str): Community intended for this post op comment_op (dict): Raw post operation Returns: is_valid (bool): If all checks pass, true """ if not community: raise Exception("no community specified") author = comment_op['author'] if author == community: return True sql = "SELECT * FROM hive_communities WHERE name = :name LIMIT 1" community_props = query_row(sql, name=community) if not community_props: # if this is not a defined community, it's free to post in. return True if get_user_role(author, community) == 'muted': return False privacy = PRIVACY_MAP[community_props['privacy']] if privacy == 'open': pass elif privacy == 'restricted': # guests cannot create top-level posts in restricted communities if comment_op['parent_author'] == "" and get_user_role( author, community) == 'guest': return False elif privacy == 'closed': # we need at least member permissions to post or comment if get_user_role(author, community) == 'guest': return False return True
def pids_by_replies_to_account(start_author: str, start_permlink: str = '', limit: int = 20): """Get a list of post_ids representing replies to an author. To get the first page of results, specify `start_author` as the account being replied to. For successive pages, provide the last loaded reply's author/permlink. """ seek = '' if start_permlink: sql = """ SELECT parent.author, child.created_at FROM hive_posts child JOIN hive_posts parent ON child.parent_id = parent.id WHERE child.author = :author AND child.permlink = :permlink """ row = query_row(sql, author=start_author, permlink=start_permlink) if not row: return [] parent_account = row[0] seek = "AND created_at <= '%s'" % row[1] else: parent_account = start_author sql = """ SELECT id FROM hive_posts WHERE parent_id IN (SELECT id FROM hive_posts WHERE author = :parent) %s AND is_deleted = '0' ORDER BY created_at DESC LIMIT :limit """ % seek return query_col(sql, parent=parent_account, limit=limit)
def _get(cls, num): """Fetch a specific block.""" sql = """SELECT num, created_at date, hash FROM hive_blocks WHERE num = :num LIMIT 1""" return dict(query_row(sql, num=num))
def _get_posts(ids, context=None): if not ids: raise Exception("no ids provided") sql = """ SELECT post_id, author, permlink, title, body, promoted, payout, created_at, payout_at, is_paidout, rshares, raw_json, category, depth, json, children, votes, author_rep, preview, img_url, is_nsfw FROM hive_posts_cache WHERE post_id IN :ids """ # key by id so we can return sorted by input order posts_by_id = {} for row in query(sql, ids=tuple(ids)).fetchall(): row = dict(row) post = {} post['post_id'] = row['post_id'] post['author'] = row['author'] post['permlink'] = row['permlink'] post['category'] = row['category'] post['parent_permlink'] = '' post['parent_author'] = '' post['title'] = row['title'] post['body'] = row['body'] post['json_metadata'] = row['json'] post['created'] = _json_date(row['created_at']) post['depth'] = row['depth'] post['children'] = row['children'] post['net_rshares'] = row['rshares'] post['cashout_time'] = '1969-12-31T23:59:59' if row[ 'is_paidout'] else _json_date(row['payout_at']) post['total_payout_value'] = ( "%.3f SBD" % row['payout']) if row['is_paidout'] else '0.000 SBD' post['curator_payout_value'] = '0.000 SBD' post['pending_payout_value'] = '0.000 SBD' if row['is_paidout'] else ( "%.3f SBD" % row['payout']) post['promoted'] = "%.3f SBD" % row['promoted'] post['replies'] = [] post['body_length'] = len(row['body']) post['active_votes'] = _hydrate_active_votes(row['votes']) post['author_reputation'] = _rep_to_raw(row['author_rep']) raw_json = {} if not row['raw_json'] else json.loads(row['raw_json']) if row['depth'] > 0: if raw_json: post['parent_permlink'] = raw_json['parent_permlink'] post['parent_author'] = raw_json['parent_author'] else: sql = "SELECT author, permlink FROM hive_posts WHERE id = (SELECT parent_id FROM hive_posts WHERE id = %d)" row2 = query_row(sql % row['post_id']) post['parent_permlink'] = row2['permlink'] post['parent_author'] = row2['author'] if raw_json: post['root_title'] = raw_json['root_title'] post['max_accepted_payout'] = raw_json['max_accepted_payout'] post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] post['url'] = raw_json['url'] #post['net_votes'] #post['allow_replies'] #post['allow_votes'] #post['allow_curation_rewards'] #post['beneficiaries'] else: post['root_title'] = 'RE: ' + post['title'] posts_by_id[row['post_id']] = post # in rare cases of cache inconsistency, recover and warn missed = set(ids) - posts_by_id.keys() if missed: print("WARNING: get_posts do not exist in cache: {}".format(missed)) for _id in missed: ids.remove(_id) return [posts_by_id[_id] for _id in ids]
def register(cls, ops, block_date): from hive.indexer.community import is_community_post_valid for op in ops: sql = ("SELECT id, is_deleted FROM hive_posts " "WHERE author = :a AND permlink = :p") ret = query_row(sql, a=op['author'], p=op['permlink']) pid = None if not ret: # post does not exist, go ahead and process it pass elif not ret[1]: # post exists and is not deleted, thus it's an edit. ignore. continue else: # post exists but was deleted. time to reinstate. pid = ret[0] # set parent & inherited attributes if op['parent_author'] == '': parent_id = None depth = 0 category = op['parent_permlink'] community = cls._get_op_community(op) or op['author'] else: parent_data = query_row( "SELECT id, depth, category, community FROM hive_posts WHERE author = :a " "AND permlink = :p", a=op['parent_author'], p=op['parent_permlink']) parent_id, parent_depth, category, community = parent_data depth = parent_depth + 1 # community must be an existing account if not Accounts.exists(community): community = op['author'] # validated community; will return None if invalid & defaults to author. is_valid = is_community_post_valid(community, op) if not is_valid: print("Invalid post @{}/{} in @{}".format( op['author'], op['permlink'], community)) # if we're reusing a previously-deleted post (rare!), update it if pid: query( "UPDATE hive_posts SET is_valid = :is_valid, is_deleted = '0', parent_id = :parent_id, category = :category, community = :community, depth = :depth WHERE id = :id", is_valid=is_valid, parent_id=parent_id, category=category, community=community, depth=depth, id=pid) else: sql = """ INSERT INTO hive_posts (is_valid, parent_id, author, permlink, category, community, depth, created_at) VALUES (:is_valid, :parent_id, :author, :permlink, :category, :community, :depth, :date) """ query(sql, is_valid=is_valid, parent_id=parent_id, author=op['author'], permlink=op['permlink'], category=category, community=community, depth=depth, date=block_date) pid = query_one( "SELECT id FROM hive_posts WHERE author = :a AND " "permlink = :p", a=op['author'], p=op['permlink']) # add top-level posts to feed cache if not op['parent_permlink']: sql = "INSERT INTO hive_feed_cache (account_id, post_id, created_at) VALUES (:account_id, :id, :created_at)" query(sql, account_id=Accounts.get_id(op['author']), id=pid, created_at=block_date)
def get_follow_counts(account: str): """Return following/followers count for `account`.""" sql = """SELECT following, followers FROM hive_accounts WHERE name = :account""" return dict(query_row(sql, account=account))
async def get_follow_count(account: str): sql = "SELECT name, following, followers FROM hive_accounts WHERE name = :n" return dict(query_row(sql, n=account))
def get_community(community_name): return query_row("SELECT * FROM hive_communities WHERE name = :n LIMIT 1", n=community_name)
def get(cls, num): sql = """SELECT num, created_at date, hash FROM hive_blocks WHERE num = :num LIMIT 1""" return query_row(sql, num=num)
def last(cls): sql = """SELECT num, created_at date, hash FROM hive_blocks ORDER BY num DESC LIMIT 1""" return query_row(sql)