def __init__(self, conf, logger): super(TimescaleListenStore, self).__init__(logger) timescale.init_db_connection(conf['SQLALCHEMY_TIMESCALE_URI']) # Initialize brainzutils cache self.ns = conf['REDIS_NAMESPACE'] init_cache(host=conf['REDIS_HOST'], port=conf['REDIS_PORT'], namespace=conf['REDIS_NAMESPACE']) self.dump_temp_dir_root = conf.get('LISTEN_DUMP_TEMP_DIR_ROOT', tempfile.mkdtemp())
def refresh_listen_count_aggregate(): """ Manually refresh the listen_count continuous aggregate. Arg: year_offset: How many years into the past should we start refreshing (e.g 1 year, will refresh everything that is 1 year or older. year_count: How many years from year_offset should we update. Example: Assuming today is 2022-01-01 and this function is called for year_offset 1 and year_count 1 then all of 2021 will be refreshed. """ timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) end_ts = int(datetime.now().timestamp()) - SECONDS_IN_A_YEAR start_ts = end_ts - \ (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR) + 1 while True: query = "call refresh_continuous_aggregate('listen_count_30day', :start_ts, :end_ts)" t0 = time.monotonic() try: with timescale.engine.connect() as connection: connection.connection.set_isolation_level(0) connection.execute(sqlalchemy.text(query), { "start_ts": start_ts, "end_ts": end_ts }) except psycopg2.OperationalError as e: self.log.error("Cannot refresh listen_count_30day cont agg: %s" % str(e), exc_info=True) raise t1 = time.monotonic() logger.info("Refreshed continuous aggregate for: %s to %s in %.2fs" % (str( datetime.fromtimestamp(start_ts)), str(datetime.fromtimestamp(end_ts)), t1-t0)) end_ts -= (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR) start_ts -= (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR) if end_ts < DATA_START_YEAR_IN_SECONDS: break
def reset_timescale_db(self): ts.init_db_connection(config.TIMESCALE_ADMIN_URI) ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql')) ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql')) ts.engine.dispose() ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI) ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql')) ts.engine.dispose() ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_schemas.sql')) ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql')) ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql')) ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql')) ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql')) ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_indexes.sql')) ts.create_view_indexes() ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql')) ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql')) ts.engine.dispose()
def init_db(force, create_db): """Initializes database. This process involves several steps: 1. Table structure is created. 2. Indexes are created. 3. Views are created """ from listenbrainz import config ts.init_db_connection(config.TIMESCALE_ADMIN_URI) if force: res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql')) if not res: raise Exception( 'Failed to drop existing database and user! Exit code: %i' % res) if create_db or force: print('TS: Creating user and a database...') res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql')) if not res: raise Exception( 'Failed to create new database and user! Exit code: %i' % res) ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI) print('TS: Creating database extensions...') res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql')) # Don't raise an exception if the extension already exists ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) application = webserver.create_app() with application.app_context(): print('TS: Creating tables...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql')) print('TS: Creating Functions...') ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql')) print('TS: Creating views...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql')) print('TS: Creating indexes...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_indexes.sql')) print("Done!")
def gen_app(config_path=None, debug=None): """ Generate a Flask app for LB with all configurations done and connections established. In the Flask app returned, blueprints are not registered. """ app = CustomFlask( import_name=__name__, use_flask_uuid=True, ) load_config(app) if debug is not None: app.debug = debug # initialize Flask-DebugToolbar if the debug option is True if app.debug and app.config['SECRET_KEY']: app.init_debug_toolbar() # Logging app.init_loggers(file_config=app.config.get('LOG_FILE'), email_config=app.config.get('LOG_EMAIL'), sentry_config=app.config.get('LOG_SENTRY')) # Redis connection create_redis(app) # Timescale connection create_timescale(app) # RabbitMQ connection try: create_rabbitmq(app) except ConnectionError: app.logger.critical("RabbitMQ service is not up!", exc_info=True) # Database connections from listenbrainz import db from listenbrainz.db import timescale as ts db.init_db_connection(app.config['SQLALCHEMY_DATABASE_URI']) ts.init_db_connection(app.config['SQLALCHEMY_TIMESCALE_URI']) from listenbrainz.webserver.external import messybrainz messybrainz.init_db_connection( app.config['MESSYBRAINZ_SQLALCHEMY_DATABASE_URI']) if app.config['MB_DATABASE_URI']: from brainzutils import musicbrainz_db musicbrainz_db.init_db_engine(app.config['MB_DATABASE_URI']) # OAuth from listenbrainz.webserver.login import login_manager, provider login_manager.init_app(app) provider.init(app.config['MUSICBRAINZ_CLIENT_ID'], app.config['MUSICBRAINZ_CLIENT_SECRET']) # Error handling from listenbrainz.webserver.errors import init_error_handlers init_error_handlers(app) from listenbrainz.webserver import rate_limiter @app.after_request def after_request_callbacks(response): return rate_limiter.inject_x_rate_headers(response) # Template utilities app.jinja_env.add_extension('jinja2.ext.do') from listenbrainz.webserver import utils app.jinja_env.filters['date'] = utils.reformat_date app.jinja_env.filters['datetime'] = utils.reformat_datetime return app
def init_ts_db(force, create_db): """Initializes database. This process involves several steps: 1. Table structure is created. 2. Indexes are created. 3. Views are created """ from listenbrainz import config ts.init_db_connection(config.TIMESCALE_ADMIN_URI) if force: res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql')) if not res: raise Exception( 'Failed to drop existing database and user! Exit code: %i' % res) if create_db or force: print('TS: Creating user and a database...') retries = 0 while True: try: res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql')) break except sqlalchemy.exc.OperationalError: print( "Trapped template1 access error, FFS! Sleeping, trying again." ) retries += 1 if retries == 5: raise sleep(1) continue if not res: raise Exception( 'Failed to create new database and user! Exit code: %i' % res) ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI) print('TS: Creating database extensions...') res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql')) # Don't raise an exception if the extension already exists ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) application = webserver.create_app() with application.app_context(): print('TS: Creating Schemas...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_schemas.sql')) print('TS: Creating Types...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql')) print('TS: Creating tables...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql')) print('TS: Creating Functions...') ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql')) print('TS: Creating views...') ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql')) print('TS: Creating indexes...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_indexes.sql')) ts.create_view_indexes() print('TS: Creating Primary and Foreign Keys...') ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql')) ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql')) print("Done!")
def create_app(debug=None): """ Generate a Flask app for LB with all configurations done and connections established. In the Flask app returned, blueprints are not registered. """ app = CustomFlask( import_name=__name__, use_flask_uuid=True, ) load_config(app) if debug is not None: app.debug = debug # As early as possible, if debug is True, set the log level of our 'listenbrainz' logger to DEBUG # to prevent flask from creating a new log handler if app.debug: logger = logging.getLogger('listenbrainz') logger.setLevel(logging.DEBUG) # initialize Flask-DebugToolbar if the debug option is True if app.debug and app.config['SECRET_KEY']: app.init_debug_toolbar() sentry_config = app.config.get('LOG_SENTRY') if sentry_config: sentry.init_sentry(**sentry_config) # Initialize BU cache and metrics cache.init(host=app.config['REDIS_HOST'], port=app.config['REDIS_PORT'], namespace=app.config['REDIS_NAMESPACE']) metrics.init("listenbrainz") # Database connections from listenbrainz import db from listenbrainz.db import timescale as ts from listenbrainz import messybrainz as msb db.init_db_connection(app.config['SQLALCHEMY_DATABASE_URI']) ts.init_db_connection(app.config['SQLALCHEMY_TIMESCALE_URI']) msb.init_db_connection(app.config['MESSYBRAINZ_SQLALCHEMY_DATABASE_URI']) # Redis connection from listenbrainz.webserver.redis_connection import init_redis_connection init_redis_connection(app.logger) # Timescale connection from listenbrainz.webserver.timescale_connection import init_timescale_connection init_timescale_connection(app) # RabbitMQ connection from listenbrainz.webserver.rabbitmq_connection import init_rabbitmq_connection try: init_rabbitmq_connection(app) except ConnectionError: app.logger.critical("RabbitMQ service is not up!", exc_info=True) if app.config['MB_DATABASE_URI']: from brainzutils import musicbrainz_db musicbrainz_db.init_db_engine(app.config['MB_DATABASE_URI']) # OAuth from listenbrainz.webserver.login import login_manager, provider login_manager.init_app(app) provider.init(app.config['MUSICBRAINZ_CLIENT_ID'], app.config['MUSICBRAINZ_CLIENT_SECRET']) # Error handling from listenbrainz.webserver.errors import init_error_handlers init_error_handlers(app) from brainzutils.ratelimit import inject_x_rate_headers, set_user_validation_function set_user_validation_function(check_ratelimit_token_whitelist) @app.after_request def after_request_callbacks(response): return inject_x_rate_headers(response) # Template utilities app.jinja_env.add_extension('jinja2.ext.do') from listenbrainz.webserver import utils app.jinja_env.filters['date'] = utils.reformat_date app.jinja_env.filters['datetime'] = utils.reformat_datetime return app
def recalculate_all_user_data(): timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) db.init_db_connection(config.SQLALCHEMY_DATABASE_URI) init_cache(host=config.REDIS_HOST, port=config.REDIS_PORT, namespace=config.REDIS_NAMESPACE) # Find the created timestamp of the last listen query = "SELECT max(created) FROM listen WHERE created > :date" try: with timescale.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query), date=datetime.now() - timedelta(weeks=4)) row = result.fetchone() last_created_ts = row[0] except psycopg2.OperationalError as e: logger.error("Cannot query ts to fetch latest listen." % str(e), exc_info=True) raise logger.info("Last created timestamp: " + str(last_created_ts)) # Select a list of users user_list = [] query = 'SELECT musicbrainz_id FROM "user"' try: with db.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query)) for row in result: user_list.append(row[0]) except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Fetched %d users. Setting empty cache entries." % len(user_list)) # Reset the timestamps and listen counts to 0 for all users for user_name in user_list: cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, expirein=0, encode=False) cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, expirein=0, encode=False) cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", expirein=0) # Tabulate all of the listen counts/timestamps for all users logger.info("Scan the whole listen table...") listen_counts = defaultdict(int) user_timestamps = {} query = "SELECT listened_at, user_name FROM listen where created <= :ts" try: with timescale.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query), ts=last_created_ts) for row in result: ts = row[0] user_name = row[1] if user_name not in user_timestamps: user_timestamps[user_name] = [ts, ts] else: if ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = ts if ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = ts listen_counts[user_name] += 1 except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Setting updated cache entries.") # Set the timestamps and listen counts for all users for user_name in user_list: try: cache.increment(REDIS_USER_LISTEN_COUNT + user_name, amount=listen_counts[user_name]) except KeyError: pass try: tss = cache.get(REDIS_USER_TIMESTAMPS + user_name) (min_ts, max_ts) = tss.split(",") min_ts = int(min_ts) max_ts = int(max_ts) if min_ts and min_ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = min_ts if max_ts and max_ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = max_ts cache.set( REDIS_USER_TIMESTAMPS + user_name, "%d,%d" % (user_timestamps[user_name][0], user_timestamps[user_name][1]), expirein=0) except KeyError: pass