def reset_autovac(): """Initializes per-table autovacuum/autoanalyze params""" # consider using scale_factor = 0 with flat thresholds: # autovacuum_vacuum_threshold, autovacuum_analyze_threshold autovac_config = { # default 'hive_accounts': (0.2, 0.1), 'hive_state': (0.2, 0.1), 'hive_reblogs': (0.2, 0.1), 'hive_payments': (0.2, 0.1), # more aggresive 'hive_posts': (0.010, 0.005), 'hive_post_tags': (0.010, 0.005), 'hive_feed_cache': (0.010, 0.005), # very aggresive 'hive_posts_cache': (0.0050, 0.0025), # @36M, ~2/day, 3/day (~240k new tuples daily) 'hive_blocks': (0.0100, 0.0014), # @20M, ~1/week, 1/day 'hive_follows': (0.0050, 0.0025) } # @47M, ~1/day, 3/day (~300k new tuples daily) for table, (vacuum_sf, analyze_sf) in autovac_config.items(): sql = """ALTER TABLE %s SET (autovacuum_vacuum_scale_factor = %s, autovacuum_analyze_scale_factor = %s)""" Db.instance().query(sql % (table, vacuum_sf, analyze_sf))
def run(): """Run the proper routine as indicated by hive --mode argument.""" conf = Conf.init_argparse() Db.set_shared_instance(conf.db()) mode = conf.mode() if mode == 'server': from hive.server.serve import run_server run_server(conf=conf) elif mode == 'sync': from hive.indexer.sync import Sync Sync(conf=conf).run() elif mode == 'status': from hive.db.db_state import DbState print(DbState.status()) #elif mode == 'sync-profile': # from hive.indexer.sync import Sync # from hive.utils.profiler import Profiler # with Profiler(): # Sync(conf=conf).run() else: raise Exception("unknown run mode %s" % mode)
def db(self): """Get a configured instance of Db.""" if self._db is None: url = self.get('database_url') enable_autoexplain = self.get( 'log_explain_queries' ) assert url, ('--database-url (or DATABASE_URL env) not specified; ' 'e.g. postgresql://user:pass@localhost:5432/hive') self._db = Db(url, "root db creation", enable_autoexplain ) log.info("The database created...") return self._db
def run(): """Run the service specified in the `--mode` argument.""" conf = Conf.init_argparse() Db.set_shared_instance(conf.db()) mode = conf.mode() if conf.get('test_profile'): from hive.utils.profiler import Profiler with Profiler(): launch_mode(mode, conf) else: launch_mode(mode, conf)
def db(self): """Get a configured instance of Db.""" if not self._db: url = self.get('database_url') assert url, ('--database-url (or DATABASE_URL env) not specified; ' 'e.g. postgresql://user:pass@localhost:5432/hive') self._db = Db(url) return self._db
def setup(): # initialize schema engine = Db.create_engine(echo=False) build_metadata().create_all(engine) # tune auto vacuum/analyze reset_autovac() # default rows sqls = [ "INSERT INTO hive_state (block_num, db_version, steem_per_mvest, usd_per_steem, sbd_per_steem, dgpo) VALUES (0, 3, 0, 0, 0, '')", "INSERT INTO hive_blocks (num, hash, created_at) VALUES (0, '0000000000000000000000000000000000000000', '2016-03-24 16:04:57')", "INSERT INTO hive_accounts (name, created_at) VALUES ('miners', '2016-03-24 16:05:00')", "INSERT INTO hive_accounts (name, created_at) VALUES ('null', '2016-03-24 16:05:00')", "INSERT INTO hive_accounts (name, created_at) VALUES ('temp', '2016-03-24 16:05:00')", "INSERT INTO hive_accounts (name, created_at) VALUES ('initminer', '2016-03-24 16:05:00')" ] for sql in sqls: Db.instance().query(sql)
def run(): """Run the proper routine as indicated by hive --mode argument.""" conf = Conf.init_argparse() Db.set_shared_instance(conf.db()) mode = '/'.join(conf.get('mode')) if mode == 'server': from hive.server.serve import run_server run_server(conf=conf) elif mode == 'sync': from hive.indexer.sync import Sync Sync(conf=conf).run() elif mode == 'status': from hive.db.db_state import DbState print(DbState.status()) else: raise Exception("unknown run mode %s" % mode)
class Databases: def __init__(self, conf): self._db_root = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.Root", conf.get('log_explain_queries')) self._db_operations = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.OperationsData", conf.get('log_explain_queries')) self._db_blocks_data = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.BlocksData", conf.get('log_explain_queries')) assert self._db_root assert self._db_operations assert self._db_blocks_data def close(self): self._db_root.close() self._db_operations.close() self._db_blocks_data.close() def get_root(self): return self._db_root def get_operations(self): return self._db_operations def get_blocks_data(self): return self._db_blocks_data
def __init__(self, conf): self._db_root = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.Root", conf.get('log_explain_queries')) self._db_operations = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.OperationsData", conf.get('log_explain_queries')) self._db_blocks_data = Db(conf.get('hived_database_url'), "MassiveBlocksProvider.BlocksData", conf.get('log_explain_queries')) assert self._db_root assert self._db_operations assert self._db_blocks_data
import logging import collections from hive.db.adapter import Db from hive.db.db_state import DbState from hive.utils.normalize import load_json_key from hive.indexer.accounts import Accounts from hive.indexer.cached_post import CachedPost from hive.indexer.feed_cache import FeedCache from hive.community.roles import is_community_post_valid log = logging.getLogger(__name__) DB = Db.instance() class Posts: """Handles critical/core post ops and data.""" # LRU cache for (author-permlink -> id) lookup (~400mb per 1M entries) CACHE_SIZE = 2000000 _ids = collections.OrderedDict() _hits = 0 _miss = 0 @classmethod def last_id(cls): """Get the last indexed post id.""" sql = "SELECT MAX(id) FROM hive_posts WHERE is_deleted = '0'" return DB.query_one(sql) or 0
"""Hive server and API tests.""" from hive.conf import Conf from hive.db.adapter import Db Db.set_shared_instance(Conf.init_test().db())
def db(cls): """Get a db adapter instance.""" if not cls._db: cls._db = Db.instance() return cls._db
def teardown(): """Drop all tables""" engine = Db.create_engine(echo=True) metadata = build_metadata() metadata.drop_all(engine)
def teardown(): engine = Db.create_engine(echo=True) metadata = build_metadata() metadata.drop_all(engine)
class Conf(): """ Manages sync/server configuration via args, ENVs, and hive.conf. """ def __init__(self): self._args = None self._env = None self._db = None self._steem = None self.arguments = None def init_argparse(self, strict=True, **kwargs): """Read hive config (CLI arg > ENV var > config)""" #pylint: disable=line-too-long parser = configargparse.get_arg_parser( default_config_files=['./hive.conf'], **kwargs) add = parser.add # runmodes: sync, server, status add('mode', nargs='*', default=['sync']) # common add('--database-url', env_var='DATABASE_URL', required=False, help='database connection url', default='') add('--steemd-url', env_var='STEEMD_URL', required=False, help='steemd/jussi endpoint', default='{"default" : "https://api.hive.blog"}') add('--muted-accounts-url', env_var='MUTED_ACCOUNTS_URL', required=False, help='url to flat list of muted accounts', default='https://raw.githubusercontent.com/hivevectordefense/irredeemables/master/full.txt') add('--blacklist-api-url', env_var='BLACKLIST_API_URL', required=False, help='url to access blacklist api', default='https://blacklist.usehive.com') # server add('--http-server-port', type=int, env_var='HTTP_SERVER_PORT', default=8080) add('--prometheus-port', type=int, env_var='PROMETHEUS_PORT', required=False, help='if specified, runs prometheus deamon on specified port, which provide statistic and performance data') # sync add('--max-workers', type=int, env_var='MAX_WORKERS', help='max workers for batch requests', default=6) add('--max-batch', type=int, env_var='MAX_BATCH', help='max chunk size for batch requests', default=35) add('--max-retries', type=int, env_var='MAX_RETRIES', help='max number of retries after request failure is accepted; default -1 means no limit', default=-1) add('--trail-blocks', type=int, env_var='TRAIL_BLOCKS', help='number of blocks to trail head by', default=2) add('--sync-to-s3', type=strtobool, env_var='SYNC_TO_S3', help='alternative healthcheck for background sync service', default=False) add('--hived-database-url', env_var='HIVED_DATABASE_URL', required=False, help='Hived blocks database connection url', default='') # test/debug add('--log-level', env_var='LOG_LEVEL', default='INFO') add('--test-disable-sync', type=strtobool, env_var='TEST_DISABLE_SYNC', help='(debug) skip sync and sweep; jump to block streaming', default=False) add('--test-max-block', type=int, env_var='TEST_MAX_BLOCK', help='(debug) only sync to given block, for running sync test', default=None) add('--test-skip-ais-phase', env_var='TEST_SKIP_AIS_PHASE', help='(debug) Allows to skip After-Initial-Sync phase. Useful to go into live sync or exit if TEST_MAX_BLOCK is used', action='store_true') add('--test-profile', type=strtobool, env_var='TEST_PROFILE', help='(debug) profile execution', default=False) add('--log-request-times', env_var='LOG_REQUEST_TIMES', help='(debug) allows to generate log containing request processing times', action='store_true') add('--log-virtual-op-calls', env_var='LOG_VIRTUAL_OP_CALLS', help='(debug) log virtual op calls and responses', default=False) add('--mock-block-data-path', type=str, nargs='+', env_var='MOCK_BLOCK_DATA_PATH', help='(debug/testing) load additional data from block data file') add('--mock-vops-data-path', type=str, env_var='MOCK_VOPS_DATA_PATH', help='(debug/testing) load additional data from virtual operations data file') add('--community-start-block', type=int, env_var='COMMUNITY_START_BLOCK', default=37500000) add('--log_explain_queries', type=strtobool, env_var='LOG_EXPLAIN_QUERIES', help='(debug) Adds to log output of EXPLAIN ANALYZE for specific queries - only for db super user', default=False) # logging add('--log-timestamp', help='Output timestamp in log', action='store_true') add('--log-epoch', help='Output unix epoch in log', action='store_true') add('--log-mask-sensitive-data', help='Mask sensitive data, e.g. passwords', action='store_true') add('--pid-file', type=str, env_var='PID_FILE', help='Allows to dump current process pid into specified file', default=None) add('--auto-http-server-port', nargs='+', type=int, help='Hivemind will listen on first available port from this range') # needed for e.g. tests - other args may be present args = (parser.parse_args() if strict else parser.parse_known_args()[0]) self._args = vars(args) self.arguments = parser._actions # configure logger and print config root = logging.getLogger() root.setLevel(self.log_level()) try: if 'auto_http_server_port' in vars(args) and vars(args)['auto_http_server_port'] is not None: port_range = vars(args)['auto_http_server_port'] port_range_len = len(port_range) if port_range_len == 0 or port_range_len > 2: raise ValueError("auto-http-server-port expect maximum two values, minimum one") if port_range_len == 2 and port_range[0] > port_range[1]: raise ValueError("port min value is greater than port max value") except Exception as ex: root.error("Value error: {}".format(ex)) exit(1) # Print command line args, but on continuous integration server # hide db connection string. from sys import argv if self.get('log_mask_sensitive_data'): my_args = [] upcoming_connection_string = False for elem in argv[1:]: if upcoming_connection_string: upcoming_connection_string = False my_args.append('MASKED') continue if elem == '--database-url': upcoming_connection_string = True my_args.append(elem) root.info("Used command line args: %s", " ".join(my_args)) else: root.info("Used command line args: %s", " ".join(argv[1:])) # uncomment for full list of program args #args_list = ["--" + k + " " + str(v) for k,v in vars(args).items()] #root.info("Full command line args: %s", " ".join(args_list)) if self.mode() == 'server': #DbStats.SLOW_QUERY_MS = 750 DbStats.SLOW_QUERY_MS = 200 # TODO def __enter__(self): return self def __exit__(self, exc_type, value, traceback): self.disconnect() def args(self): """Get the raw Namespace object as generated by configargparse""" return self._args def steem(self): """Get a SteemClient instance, lazily initialized""" if not self._steem: from json import loads self._steem = SteemClient( url=loads(self.get('steemd_url')), max_batch=self.get('max_batch'), max_workers=self.get('max_workers'), max_retries=self.get('max_retries')) return self._steem def db(self): """Get a configured instance of Db.""" if self._db is None: url = self.get('database_url') enable_autoexplain = self.get( 'log_explain_queries' ) assert url, ('--database-url (or DATABASE_URL env) not specified; ' 'e.g. postgresql://user:pass@localhost:5432/hive') self._db = Db(url, "root db creation", enable_autoexplain ) log.info("The database created...") return self._db def get(self, param): """Reads a single property, e.g. `database_url`.""" assert self._args, "run init_argparse()" return self._args[param] def mode(self): """Get the CLI runmode. - `server`: API server - `sync`: db sync process - `status`: status info dump """ return '/'.join(self.get('mode')) def log_level(self): """Get `logger`s internal int level from config string.""" return int_log_level(self.get('log_level')) def pid_file(self): """Get optional pid_file name to put current process pid in""" return self._args.get("pid_file", None) def generate_completion(self): arguments = [] for arg in self.arguments: arguments.extend(arg.option_strings) arguments = " ".join(arguments) with open('hive-completion.bash', 'w') as file: file.writelines([ "#!/bin/bash\n", "# to run type: source hive-completion.bash\n\n", "# if you want to have completion everywhere, execute theese commands\n", "# ln $PWD/hive-completion.bash $HOME/.local/\n", '# echo "source $HOME/.local/hive-completion.bash" >> $HOME/.bashrc\n', "# source $HOME/.bashrc\n\n" f'complete -f -W "{arguments}" hive\n', "\n" ]) def disconnect(self): if self._db is not None: self._db.close() self._db.close_engine() self._db = None log.info("The database is disconnected...")