def create_app(self): app = create_app(config_path=os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'test_config.py' )) app.config['TESTING'] = True return app
def create(location, threads): """ Create a ListenBrainz data dump which includes a private dump, a statistics dump and a dump of the actual listens from InfluxDB Args: location (str): path to the directory where the dump should be made threads (int): the number of threads to be used while compression """ app = create_app() with app.app_context(): ls = init_influx_connection(current_app.logger, { 'REDIS_HOST': current_app.config['REDIS_HOST'], 'REDIS_PORT': current_app.config['REDIS_PORT'], 'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': current_app.config['INFLUX_HOST'], 'INFLUX_PORT': current_app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'], }) time_now = datetime.today() dump_path = os.path.join(location, 'listenbrainz-dump-{time}'.format(time=time_now.strftime('%Y%m%d-%H%M%S'))) create_path(dump_path) db_dump.dump_postgres_db(dump_path, time_now, threads) ls.dump_listens(dump_path, time_now, threads) try: write_hashes(dump_path) except IOError as e: current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True) return current_app.logger.info('Dumps created and hashes written at %s' % dump_path)
def runserver(host, port, debug=False): application = webserver.create_app() run_simple( hostname=host, port=port, application=application, use_debugger=debug, use_reloader=debug, processes=5 )
def start(self): app = create_app() with app.app_context(): current_app.logger.info("bigquery-writer init") self._verify_hosts_in_config() # if we're not supposed to run, just sleep if not current_app.config['WRITE_TO_BIGQUERY']: sleep(66666) return try: self.bigquery = create_bigquery_object() except NoCredentialsFileException as e: current_app.logger.critical("BigQuery credential file not present! Sleeping...") sleep(100000) except NoCredentialsVariableException as e: current_app.logger.critical("BigQuery credentials environment variable not set!") sleep(100000) while True: try: self.redis = Redis( host=current_app.config['REDIS_HOST'], port=current_app.config['REDIS_PORT'], ) self.redis.ping() break except Exception as err: current_app.logger.warn("Cannot connect to redis: %s. Retrying in 3 seconds and trying again." % str(err), exc_info=True) sleep(self.ERROR_RETRY_DELAY) while True: self.connect_to_rabbitmq() self.channel = self.connection.channel() self.channel.exchange_declare(exchange=current_app.config['UNIQUE_EXCHANGE'], exchange_type='fanout') self.channel.queue_declare(current_app.config['UNIQUE_QUEUE'], durable=True) self.channel.queue_bind(exchange=current_app.config['UNIQUE_EXCHANGE'], queue=current_app.config['UNIQUE_QUEUE']) self.channel.basic_consume( lambda ch, method, properties, body: self.static_callback(ch, method, properties, body, obj=self), queue=current_app.config['UNIQUE_QUEUE'], ) self.channel.basic_qos(prefetch_count=PREFETCH_COUNT) current_app.logger.info("bigquery-writer started") try: self.channel.start_consuming() except pika.exceptions.ConnectionClosed: current_app.logger.warn("Connection to rabbitmq closed. Re-opening.") self.connection = None self.channel = None continue self.connection.close()
def test_verify_hosts_in_config(self, mock_sleep): """ Test for the _verify_hosts_in_config method """ app1 = create_app() app2 = create_app() if "REDIS_HOST" in app1.config: app1.config.pop("REDIS_HOST") with self.assertRaises(SystemExit) as s: with app1.app_context(): self.lwriter._verify_hosts_in_config() self.assertEqual(s.exception.code, -1) if "RABBITMQ_HOST" in app2.config: app2.config.pop("RABBITMQ_HOST") with self.assertRaises(SystemExit) as s: with app2.app_context(): self.lwriter._verify_hosts_in_config() self.assertEqual(s.exception.code, -1)
def import_dump(private_archive, public_archive, listen_archive, threads): """ Import a ListenBrainz dump into the database. Note: This method tries to import the private db dump first, followed by the public db dump. However, in absence of a private dump, it imports sanitized versions of the user table in the public dump in order to satisfy foreign key constraints. Then it imports the listen dump. Args: private_archive (str): the path to the ListenBrainz private dump to be imported public_archive (str): the path to the ListenBrainz public dump to be imported listen_archive (str): the path to the ListenBrainz listen dump archive to be imported threads (int): the number of threads to use during decompression, defaults to 1 """ if not private_archive and not public_archive and not listen_archive: print('You need to enter a path to the archive(s) to import!') sys.exit(1) app = create_app() with app.app_context(): db_dump.import_postgres_dump(private_archive, public_archive, threads) ls = init_influx_connection(current_app.logger, { 'REDIS_HOST': current_app.config['REDIS_HOST'], 'REDIS_PORT': current_app.config['REDIS_PORT'], 'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': current_app.config['INFLUX_HOST'], 'INFLUX_PORT': current_app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'], }) try: ls.import_listens_dump(listen_archive, threads) except IOError as e: current_app.logger.critical('IOError while trying to import data into Influx: %s', str(e), exc_info=True) raise except InfluxDBClientError as e: current_app.logger.critical('Error while sending data to Influx: %s', str(e), exc_info=True) raise except InfluxDBServerError as e: current_app.logger.critical('InfluxDB Server Error while importing data: %s', str(e), exc_info=True) raise except Exception as e: current_app.logger.critical('Unexpected error while importing data: %s', str(e), exc_info=True) raise
def init_db(force, create_db): """Initializes database. This process involves several steps: 1. Table structure is created. 2. Primary keys and foreign keys are created. 3. Indexes are created. """ db.init_db_connection(config.POSTGRES_ADMIN_URI) if force: res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'drop_db.sql')) if not res: raise Exception('Failed to drop existing database and user! Exit code: %i' % res) if create_db: print('Creating user and a database...') res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'create_db.sql')) if not res: raise Exception('Failed to create new database and user! Exit code: %i' % res) print('Creating database extensions...') res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'create_extensions.sql')) # Don't raise an exception if the extension already exists application = webserver.create_app() with application.app_context(): print('Creating schema...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_schema.sql')) print('Creating tables...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_tables.sql')) print('Creating primary and foreign keys...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_primary_keys.sql')) db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_foreign_keys.sql')) print('Creating indexes...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_indexes.sql')) print("Done!")
def setUp(self): super(HandlersTestCase, self).setUp() self.app = create_app() db_user.create(1, 'iliekcomputers') db_user.create(2, 'lucifer') self.maxDiff = None
def start(self): app = create_app() with app.app_context(): current_app.logger.info("influx-writer init") self._verify_hosts_in_config() if "INFLUX_HOST" not in current_app.config: current_app.logger.critical("Influx service not defined. Sleeping {0} seconds and exiting.".format(self.ERROR_RETRY_DELAY)) sleep(self.ERROR_RETRY_DELAY) sys.exit(-1) while True: try: self.ls = InfluxListenStore({ 'REDIS_HOST': current_app.config['REDIS_HOST'], 'REDIS_PORT': current_app.config['REDIS_PORT'], 'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': current_app.config['INFLUX_HOST'], 'INFLUX_PORT': current_app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'], }, logger=current_app.logger) self.influx = InfluxDBClient( host=current_app.config['INFLUX_HOST'], port=current_app.config['INFLUX_PORT'], database=current_app.config['INFLUX_DB_NAME'], ) break except Exception as err: current_app.logger.error("Cannot connect to influx: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True) sleep(self.ERROR_RETRY_DELAY) while True: try: self.redis = Redis(host=current_app.config['REDIS_HOST'], port=current_app.config['REDIS_PORT'], decode_responses=True) self.redis.ping() break except Exception as err: current_app.logger.error("Cannot connect to redis: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True) sleep(self.ERROR_RETRY_DELAY) while True: self.connect_to_rabbitmq() self.incoming_ch = self.connection.channel() self.incoming_ch.exchange_declare(exchange=current_app.config['INCOMING_EXCHANGE'], exchange_type='fanout') self.incoming_ch.queue_declare(current_app.config['INCOMING_QUEUE'], durable=True) self.incoming_ch.queue_bind(exchange=current_app.config['INCOMING_EXCHANGE'], queue=current_app.config['INCOMING_QUEUE']) self.incoming_ch.basic_consume( lambda ch, method, properties, body: self.static_callback(ch, method, properties, body, obj=self), queue=current_app.config['INCOMING_QUEUE'], ) self.unique_ch = self.connection.channel() self.unique_ch.exchange_declare(exchange=current_app.config['UNIQUE_EXCHANGE'], exchange_type='fanout') current_app.logger.info("influx-writer started") try: self.incoming_ch.start_consuming() except pika.exceptions.ConnectionClosed: current_app.logger.warn("Connection to rabbitmq closed. Re-opening.", exc_info=True) self.connection = None continue self.connection.close()
def create_incremental(location, threads, dump_id): app = create_app() with app.app_context(): from listenbrainz.webserver.timescale_connection import _ts as ls if dump_id is None: end_time = datetime.now() dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s'))) else: dump_entry = db_dump.get_dump_entry(dump_id) if dump_entry is None: current_app.logger.error("No dump with ID %d found, exiting!", dump_id) sys.exit(-1) end_time = dump_entry['created'] prev_dump_entry = db_dump.get_dump_entry(dump_id - 1) if prev_dump_entry is None: # incremental dumps must have a previous dump in the series current_app.logger.error( "Invalid dump ID %d, could not find previous dump", dump_id) sys.exit(-1) start_time = prev_dump_entry['created'] current_app.logger.info("Dumping data from %s to %s", start_time, end_time) dump_name = 'listenbrainz-dump-{dump_id}-{time}-incremental'.format( dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S')) dump_path = os.path.join(location, dump_name) create_path(dump_path) listens_dump_file = ls.dump_listens(dump_path, dump_id=dump_id, start_time=start_time, end_time=end_time, threads=threads) spark_dump_file = 'listenbrainz-listens-dump-{dump_id}-{time}-spark-incremental.tar.xz'.format( dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S')) spark_dump_path = os.path.join(location, dump_path, spark_dump_file) transmogrify_dump_file_to_spark_import_format(listens_dump_file, spark_dump_path, threads) try: write_hashes(dump_path) except IOError as e: current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True) sys.exit(-1) try: if not sanity_check_dumps(dump_path, 6): return sys.exit(-1) except OSError as e: sys.exit(-1) # if in production, send an email to interested people for observability send_dump_creation_notification(dump_name, 'incremental') # Write the DUMP_ID file so that the FTP sync scripts can be more robust with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f: f.write("%s %s incremental\n" % (end_time.strftime('%Y%m%d-%H%M%S'), dump_id)) current_app.logger.info('Dumps created and hashes written at %s' % dump_path) sys.exit(0)
from listenbrainz import db from brainzutils import musicbrainz_db from listenbrainz.webserver import create_app from listenbrainz.webserver.views.api_tools import publish_data_to_queue from listenbrainz.listenstore import InfluxListenStore from listenbrainz.webserver.influx_connection import init_influx_connection from werkzeug.exceptions import NotFound import listenbrainz.db.user as db_user import logging import sqlalchemy app = create_app() influx = init_influx_connection( logging, { 'REDIS_HOST': app.config['REDIS_HOST'], 'REDIS_PORT': app.config['REDIS_PORT'], 'REDIS_NAMESPACE': app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': app.config['INFLUX_HOST'], 'INFLUX_PORT': app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': app.config['INFLUX_DB_NAME'], }) def update_row_ids_for_exceptions(): with musicbrainz_db.engine.connect() as mb_connection: with db.engine.connect() as connection: # 2106 - Fée Deuspi result = mb_connection.execute( sqlalchemy.text(""" SELECT id
def setUp(self): super().setUp() self.tempdir = tempfile.mkdtemp() self.app = create_app()
def setUp(self): self.app = create_app()
def calculate_user_similarity(): application = webserver.create_app() with application.app_context(): user_similarity.calculate_similar_users()
def create_app(self): app = create_app() app.config['TESTING'] = True return app
from listenbrainz import db from brainzutils import musicbrainz_db from listenbrainz.webserver import create_app from listenbrainz.webserver.views.api_tools import publish_data_to_queue from listenbrainz.listenstore import InfluxListenStore from listenbrainz.webserver.influx_connection import init_influx_connection from werkzeug.exceptions import NotFound import listenbrainz.db.user as db_user import logging import sqlalchemy app = create_app() influx = init_influx_connection(logging, { 'REDIS_HOST': app.config['REDIS_HOST'], 'REDIS_PORT': app.config['REDIS_PORT'], 'REDIS_NAMESPACE': app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': app.config['INFLUX_HOST'], 'INFLUX_PORT': app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': app.config['INFLUX_DB_NAME'], }) def update_row_ids_for_exceptions(): with musicbrainz_db.engine.connect() as mb_connection: with db.engine.connect() as connection: # 2106 - Fée Deuspi result = mb_connection.execute(sqlalchemy.text(""" SELECT id FROM editor WHERE name = 'Fée Deuspi'
def setUp(self): super(BigQueryJobRunnerTestCase, self).setUp() self.app = create_app() # create a flask app for config purposes self.sc = BigQueryJobRunner() self.sc.bigquery = MagicMock() self.user = db_user.get_or_create(21, 'stats_calculator_test_user')
def __init__(self, user_name): self.user_name = user_name self.max_time = datetime.now() self.app = create_app()
def __init__(self): self.app = create_app()
def __init__(self): self.app = create_app() # creating a flask app for config values and logging to Sentry
def update_user_emails(): from listenbrainz.webserver.login import copy_files_from_mb_to_lb application = webserver.create_app() with application.app_context(): copy_files_from_mb_to_lb.copy_emails()
def check_ftp_dump_ages(): """ Fetch the FTP dir listing of the full and incremental dumps and check their ages. Send mail to the observability list in case the dumps are too old. """ msg = "" try: latest_file = _fetch_latest_file_info_from_ftp_dir( MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/fullexport') id, dt = _parse_ftp_name_with_id(latest_file) age = datetime.now() - dt if age > timedelta(days=FULLEXPORT_MAX_AGE): msg = "Full dump %d is more than %d days old: %s\n" % ( id, FULLEXPORT_MAX_AGE, str(age)) print(msg, end="") else: print("Full dump %s is %s old, good!" % (id, str(age))) except Exception as err: msg = "Cannot fetch full dump age: %s\n\n%s" % (str(err), traceback.format_exc()) try: latest_file = _fetch_latest_file_info_from_ftp_dir( MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/incremental') id, dt = _parse_ftp_name_with_id(latest_file) age = datetime.now() - dt if age > timedelta(hours=INCREMENTAL_MAX_AGE): msg = "Incremental dump %s is more than %s hours old: %s\n" % ( id, INCREMENTAL_MAX_AGE, str(age)) print(msg, end="") else: print("Incremental dump %s is %s old, good!" % (id, str(age))) except Exception as err: msg = "Cannot fetch incremental dump age: %s\n\n%s" % ( str(err), traceback.format_exc()) try: latest_file = _fetch_latest_file_info_from_ftp_dir( MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/spark') id, dt = _parse_ftp_name_without_id(latest_file) age = datetime.now() - dt if age > timedelta(days=FEEDBACK_MAX_AGE): msg = "Feedback dump %s is more than %s days old: %s\n" % ( id, FEEDBACK_MAX_AGE, str(age)) print(msg, end="") else: print("Feedback dump %s is %s old, good!" % (id, str(age))) except Exception as err: msg = "Cannot fetch feedback dump age: %s\n\n%s" % ( str(err), traceback.format_exc()) app = create_app() with app.app_context(): if not current_app.config['TESTING'] and msg: send_mail(subject="ListenBrainz outdated dumps!", text=render_template('emails/data_dump_outdated.txt', msg=msg), recipients=['*****@*****.**'], from_name='ListenBrainz', from_addr='noreply@' + current_app.config['MAIL_FROM_DOMAIN']) elif msg: print(msg)
def setUp(self): DatabaseTestCase.setUp(self) TimescaleTestCase.setUp(self) self.log = logging.getLogger(__name__) self.app = create_app() self.logstore = TimescaleListenStore(self.log)
from listenbrainz.webserver import create_app application = create_app()
def __init__(self): self.app = create_app(debug=True) # creating a flask app for config values self.log = logging.getLogger(__name__) logging.basicConfig() self.log.setLevel(logging.INFO)
def setUp(self): self.app = create_app(debug=True) # create an app for config value access
def create_full(location, threads, dump_id, last_dump_id): """ Create a ListenBrainz data dump which includes a private dump, a statistics dump and a dump of the actual listens from InfluxDB Args: location (str): path to the directory where the dump should be made threads (int): the number of threads to be used while compression dump_id (int): the ID of the ListenBrainz data dump last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table """ app = create_app() with app.app_context(): ls = init_influx_connection( current_app.logger, { 'REDIS_HOST': current_app.config['REDIS_HOST'], 'REDIS_PORT': current_app.config['REDIS_PORT'], 'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'], 'INFLUX_HOST': current_app.config['INFLUX_HOST'], 'INFLUX_PORT': current_app.config['INFLUX_PORT'], 'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'], }) if last_dump_id: all_dumps = db_dump.get_dump_entries() if len(all_dumps) == 0: current_app.logger.error( "Cannot create full dump with last dump's ID, no dump exists!" ) sys.exit(-1) dump_id = all_dumps[0]['id'] if dump_id is None: end_time = datetime.now() dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s'))) else: dump_entry = db_dump.get_dump_entry(dump_id) if dump_entry is None: current_app.logger.error("No dump with ID %d found", dump_id) sys.exit(-1) end_time = dump_entry['created'] dump_path = os.path.join( location, 'listenbrainz-dump-{dump_id}-{time}-full'.format( dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S'))) create_path(dump_path) db_dump.dump_postgres_db(dump_path, end_time, threads) ls.dump_listens(dump_path, dump_id=dump_id, end_time=end_time, threads=threads, spark_format=False) ls.dump_listens(dump_path, dump_id=dump_id, end_time=end_time, threads=threads, spark_format=True) try: write_hashes(dump_path) except IOError as e: current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True) return current_app.logger.info('Dumps created and hashes written at %s' % dump_path)
def init_ts_db(force, create_db): """Initializes database. This process involves several steps: 1. Table structure is created. 2. Indexes are created. 3. Views are created """ from listenbrainz import config ts.init_db_connection(config.TIMESCALE_ADMIN_URI) if force: res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql')) if not res: raise Exception( 'Failed to drop existing database and user! Exit code: %i' % res) if create_db or force: print('TS: Creating user and a database...') retries = 0 while True: try: res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql')) break except sqlalchemy.exc.OperationalError: print( "Trapped template1 access error, FFS! Sleeping, trying again." ) retries += 1 if retries == 5: raise sleep(1) continue if not res: raise Exception( 'Failed to create new database and user! Exit code: %i' % res) ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI) print('TS: Creating database extensions...') res = ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql')) # Don't raise an exception if the extension already exists ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) application = webserver.create_app() with application.app_context(): print('TS: Creating Schemas...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_schemas.sql')) print('TS: Creating Types...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql')) print('TS: Creating tables...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql')) print('TS: Creating Functions...') ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql')) print('TS: Creating views...') ts.run_sql_script_without_transaction( os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql')) print('TS: Creating indexes...') ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_indexes.sql')) ts.create_view_indexes() print('TS: Creating Primary and Foreign Keys...') ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql')) ts.run_sql_script( os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql')) print("Done!")
def update_user_listen_data(): """ Scans listen table and update listen metadata for all users """ application = webserver.create_app() with application.app_context(): ts_update_user_listen_data()
def set_rate_limits(per_token_limit, per_ip_limit, window_size): from brainzutils.ratelimit import set_rate_limits application = webserver.create_app() with application.app_context(): set_rate_limits(per_token_limit, per_ip_limit, window_size)
def delete_pending_listens(): """ Complete all pending listen deletes since last cron run """ application = webserver.create_app() with application.app_context(): ts_delete_listens()
def create_full(location, threads, dump_id, do_listen_dump: bool, do_spark_dump: bool, do_db_dump: bool): """ Create a ListenBrainz data dump which includes a private dump, a statistics dump and a dump of the actual listens from the listenstore. Args: location (str): path to the directory where the dump should be made threads (int): the number of threads to be used while compression dump_id (int): the ID of the ListenBrainz data dump do_listen_dump: If True, make a listens dump do_spark_dump: If True, make a spark listens dump do_db_dump: If True, make a public/private postgres/timescale dump """ app = create_app() with app.app_context(): ls = DumpListenStore(app) if dump_id is None: end_time = datetime.now() dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s'))) else: dump_entry = db_dump.get_dump_entry(dump_id) if dump_entry is None: current_app.logger.error("No dump with ID %d found", dump_id) sys.exit(-1) end_time = dump_entry['created'] ts = end_time.strftime('%Y%m%d-%H%M%S') dump_name = 'listenbrainz-dump-{dump_id}-{time}-full'.format( dump_id=dump_id, time=ts) dump_path = os.path.join(location, dump_name) create_path(dump_path) expected_num_dumps = 0 if do_db_dump: db_dump.dump_postgres_db(dump_path, end_time, threads) expected_num_dumps += 4 if do_listen_dump: ls.dump_listens(dump_path, dump_id=dump_id, end_time=end_time, threads=threads) expected_num_dumps += 1 if do_spark_dump: ls.dump_listens_for_spark(dump_path, dump_id=dump_id, dump_type="full", end_time=end_time) expected_num_dumps += 1 try: write_hashes(dump_path) except IOError as e: current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True) sys.exit(-1) try: # 6 types of dumps, archive, md5, sha256 for each expected_num_dump_files = expected_num_dumps * 3 if not sanity_check_dumps(dump_path, expected_num_dump_files): return sys.exit(-1) except OSError: sys.exit(-1) current_app.logger.info('Dumps created and hashes written at %s' % dump_path) # Write the DUMP_ID file so that the FTP sync scripts can be more robust with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f: f.write("%s %s full\n" % (ts, dump_id)) # if in production, send an email to interested people for observability send_dump_creation_notification(dump_name, 'fullexport') sys.exit(0)
def delete_listens_and_update_metadata(): """ Complete all pending listen deletes and also run update script for updating listen metadata since last cron run """ application = webserver.create_app() with application.app_context(): ts_delete_listens_and_update_user_listen_data()
def start(self): app = create_app() with app.app_context(): current_app.logger.info("timescale-writer init") self._verify_hosts_in_config() if "SQLALCHEMY_TIMESCALE_URI" not in current_app.config: current_app.logger.critical( "Timescale service not defined. Sleeping {0} seconds and exiting." .format(self.ERROR_RETRY_DELAY)) sleep(self.ERROR_RETRY_DELAY) sys.exit(-1) try: while True: try: self.ls = TimescaleListenStore( { 'REDIS_HOST': current_app.config['REDIS_HOST'], 'REDIS_PORT': current_app.config['REDIS_PORT'], 'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'], 'SQLALCHEMY_TIMESCALE_URI': current_app.config['SQLALCHEMY_TIMESCALE_URI'] }, logger=current_app.logger) break except Exception as err: current_app.logger.error( "Cannot connect to timescale: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True) sleep(self.ERROR_RETRY_DELAY) while True: try: self.redis = Redis( host=current_app.config['REDIS_HOST'], port=current_app.config['REDIS_PORT'], decode_responses=True) self.redis.ping() self.redis_listenstore = RedisListenStore( current_app.logger, current_app.config) break except Exception as err: current_app.logger.error( "Cannot connect to redis: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True) sleep(self.ERROR_RETRY_DELAY) while True: self.connect_to_rabbitmq() self.incoming_ch = self.connection.channel() self.incoming_ch.exchange_declare( exchange=current_app.config['INCOMING_EXCHANGE'], exchange_type='fanout') self.incoming_ch.queue_declare( current_app.config['INCOMING_QUEUE'], durable=True) self.incoming_ch.queue_bind( exchange=current_app.config['INCOMING_EXCHANGE'], queue=current_app.config['INCOMING_QUEUE']) self.incoming_ch.basic_consume( queue=current_app.config['INCOMING_QUEUE'], on_message_callback=lambda ch, method, properties, body: self.static_callback( ch, method, properties, body, obj=self)) self.unique_ch = self.connection.channel() self.unique_ch.exchange_declare( exchange=current_app.config['UNIQUE_EXCHANGE'], exchange_type='fanout') try: self.incoming_ch.start_consuming() except pika.exceptions.ConnectionClosed: current_app.logger.warn( "Connection to rabbitmq closed. Re-opening.", exc_info=True) self.connection = None continue self.connection.close() except Exception: current_app.logger.error("failed to start timescale loop:", exc_info=True)
def add_missing_to_listen_users_metadata(): application = webserver.create_app() with application.app_context(): ts_add_missing_to_listen_users_metadata()
def create_full(location, threads, dump_id, last_dump_id): """ Create a ListenBrainz data dump which includes a private dump, a statistics dump and a dump of the actual listens from the listenstore Args: location (str): path to the directory where the dump should be made threads (int): the number of threads to be used while compression dump_id (int): the ID of the ListenBrainz data dump last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table """ app = create_app() with app.app_context(): from listenbrainz.webserver.timescale_connection import _ts as ls if last_dump_id: all_dumps = db_dump.get_dump_entries() if len(all_dumps) == 0: current_app.logger.error( "Cannot create full dump with last dump's ID, no dump exists!" ) sys.exit(-1) dump_id = all_dumps[0]['id'] if dump_id is None: end_time = datetime.now() dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s'))) else: dump_entry = db_dump.get_dump_entry(dump_id) if dump_entry is None: current_app.logger.error("No dump with ID %d found", dump_id) sys.exit(-1) end_time = dump_entry['created'] ts = end_time.strftime('%Y%m%d-%H%M%S') dump_name = 'listenbrainz-dump-{dump_id}-{time}-full'.format( dump_id=dump_id, time=ts) dump_path = os.path.join(location, dump_name) create_path(dump_path) db_dump.dump_postgres_db(dump_path, end_time, threads) listens_dump_file = ls.dump_listens(dump_path, dump_id=dump_id, end_time=end_time, threads=threads) spark_dump_file = 'listenbrainz-listens-dump-{dump_id}-{time}-spark-full.tar.xz'.format( dump_id=dump_id, time=ts) spark_dump_path = os.path.join(location, dump_path, spark_dump_file) transmogrify_dump_file_to_spark_import_format(listens_dump_file, spark_dump_path, threads) try: write_hashes(dump_path) except IOError as e: current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True) sys.exit(-1) try: if not sanity_check_dumps(dump_path, 12): return sys.exit(-1) except OSError as e: sys.exit(-1) # if in production, send an email to interested people for observability send_dump_creation_notification(dump_name, 'fullexport') current_app.logger.info('Dumps created and hashes written at %s' % dump_path) # Write the DUMP_ID file so that the FTP sync scripts can be more robust with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f: f.write("%s %s full\n" % (ts, dump_id)) sys.exit(0)
def notify_yim_users(): application = webserver.create_app() with application.app_context(): from listenbrainz.db import year_in_music year_in_music.notify_yim_users()
def create_app(self): app = create_app(debug=False) app.config['TESTING'] = True return app