def create_app(self):
     app = create_app(config_path=os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         '..', 'test_config.py'
     ))
     app.config['TESTING'] = True
     return app
def create(location, threads):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from InfluxDB

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
    """
    app = create_app()
    with app.app_context():
        ls = init_influx_connection(current_app.logger,  {
            'REDIS_HOST': current_app.config['REDIS_HOST'],
            'REDIS_PORT': current_app.config['REDIS_PORT'],
            'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'],
            'INFLUX_HOST': current_app.config['INFLUX_HOST'],
            'INFLUX_PORT': current_app.config['INFLUX_PORT'],
            'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'],
        })
        time_now = datetime.today()
        dump_path = os.path.join(location, 'listenbrainz-dump-{time}'.format(time=time_now.strftime('%Y%m%d-%H%M%S')))
        create_path(dump_path)
        db_dump.dump_postgres_db(dump_path, time_now, threads)
        ls.dump_listens(dump_path, time_now, threads)
        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s', str(e), exc_info=True)
            return
        current_app.logger.info('Dumps created and hashes written at %s' % dump_path)
def runserver(host, port, debug=False):
    application = webserver.create_app()
    run_simple(
        hostname=host,
        port=port,
        application=application,
        use_debugger=debug,
        use_reloader=debug,
        processes=5
    )
    def start(self):
        app = create_app()
        with app.app_context():
            current_app.logger.info("bigquery-writer init")

            self._verify_hosts_in_config()

            # if we're not supposed to run, just sleep
            if not current_app.config['WRITE_TO_BIGQUERY']:
                sleep(66666)
                return

            try:
                self.bigquery = create_bigquery_object()
            except NoCredentialsFileException as e:
                current_app.logger.critical("BigQuery credential file not present! Sleeping...")
                sleep(100000)
            except NoCredentialsVariableException as e:
                current_app.logger.critical("BigQuery credentials environment variable not set!")
                sleep(100000)

            while True:
                try:
                    self.redis = Redis(
                        host=current_app.config['REDIS_HOST'],
                        port=current_app.config['REDIS_PORT'],
                    )
                    self.redis.ping()
                    break
                except Exception as err:
                    current_app.logger.warn("Cannot connect to redis: %s. Retrying in 3 seconds and trying again." % str(err), exc_info=True)
                    sleep(self.ERROR_RETRY_DELAY)

            while True:
                self.connect_to_rabbitmq()
                self.channel = self.connection.channel()
                self.channel.exchange_declare(exchange=current_app.config['UNIQUE_EXCHANGE'], exchange_type='fanout')
                self.channel.queue_declare(current_app.config['UNIQUE_QUEUE'], durable=True)
                self.channel.queue_bind(exchange=current_app.config['UNIQUE_EXCHANGE'], queue=current_app.config['UNIQUE_QUEUE'])
                self.channel.basic_consume(
                    lambda ch, method, properties, body: self.static_callback(ch, method, properties, body, obj=self),
                    queue=current_app.config['UNIQUE_QUEUE'],
                )
                self.channel.basic_qos(prefetch_count=PREFETCH_COUNT)

                current_app.logger.info("bigquery-writer started")
                try:
                    self.channel.start_consuming()
                except pika.exceptions.ConnectionClosed:
                    current_app.logger.warn("Connection to rabbitmq closed. Re-opening.")
                    self.connection = None
                    self.channel = None
                    continue

                self.connection.close()
    def test_verify_hosts_in_config(self, mock_sleep):
        """ Test for the _verify_hosts_in_config method """

        app1 = create_app()
        app2 = create_app()

        if "REDIS_HOST" in app1.config:
            app1.config.pop("REDIS_HOST")

        with self.assertRaises(SystemExit) as s:
            with app1.app_context():
                self.lwriter._verify_hosts_in_config()
                self.assertEqual(s.exception.code, -1)

        if "RABBITMQ_HOST" in app2.config:
            app2.config.pop("RABBITMQ_HOST")

        with self.assertRaises(SystemExit) as s:
            with app2.app_context():
                self.lwriter._verify_hosts_in_config()
                self.assertEqual(s.exception.code, -1)
def import_dump(private_archive, public_archive, listen_archive, threads):
    """ Import a ListenBrainz dump into the database.

        Note: This method tries to import the private db dump first, followed by the public db
            dump. However, in absence of a private dump, it imports sanitized versions of the
            user table in the public dump in order to satisfy foreign key constraints.

        Then it imports the listen dump.

        Args:
            private_archive (str): the path to the ListenBrainz private dump to be imported
            public_archive (str): the path to the ListenBrainz public dump to be imported
            listen_archive (str): the path to the ListenBrainz listen dump archive to be imported
            threads (int): the number of threads to use during decompression, defaults to 1
    """
    if not private_archive and not public_archive and not listen_archive:
        print('You need to enter a path to the archive(s) to import!')
        sys.exit(1)

    app = create_app()
    with app.app_context():
        db_dump.import_postgres_dump(private_archive, public_archive, threads)

        ls = init_influx_connection(current_app.logger,  {
            'REDIS_HOST': current_app.config['REDIS_HOST'],
            'REDIS_PORT': current_app.config['REDIS_PORT'],
            'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'],
            'INFLUX_HOST': current_app.config['INFLUX_HOST'],
            'INFLUX_PORT': current_app.config['INFLUX_PORT'],
            'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'],
        })

        try:
            ls.import_listens_dump(listen_archive, threads)
        except IOError as e:
            current_app.logger.critical('IOError while trying to import data into Influx: %s', str(e), exc_info=True)
            raise
        except InfluxDBClientError as e:
            current_app.logger.critical('Error while sending data to Influx: %s', str(e), exc_info=True)
            raise
        except InfluxDBServerError as e:
            current_app.logger.critical('InfluxDB Server Error while importing data: %s', str(e), exc_info=True)
            raise
        except Exception as e:
            current_app.logger.critical('Unexpected error while importing data: %s', str(e), exc_info=True)
            raise
def init_db(force, create_db):
    """Initializes database.

    This process involves several steps:
    1. Table structure is created.
    2. Primary keys and foreign keys are created.
    3. Indexes are created.
    """

    db.init_db_connection(config.POSTGRES_ADMIN_URI)
    if force:
        res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'drop_db.sql'))
        if not res:
            raise Exception('Failed to drop existing database and user! Exit code: %i' % res)

    if create_db:
        print('Creating user and a database...')
        res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'create_db.sql'))
        if not res:
            raise Exception('Failed to create new database and user! Exit code: %i' % res)

        print('Creating database extensions...')
        res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'create_extensions.sql'))
    # Don't raise an exception if the extension already exists

    application = webserver.create_app()
    with application.app_context():
        print('Creating schema...')
        db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_schema.sql'))

        print('Creating tables...')
        db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_tables.sql'))

        print('Creating primary and foreign keys...')
        db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_primary_keys.sql'))
        db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_foreign_keys.sql'))

        print('Creating indexes...')
        db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_indexes.sql'))

        print("Done!")
Beispiel #8
0
 def setUp(self):
     super(HandlersTestCase, self).setUp()
     self.app = create_app()
     db_user.create(1, 'iliekcomputers')
     db_user.create(2, 'lucifer')
     self.maxDiff = None
    def start(self):
        app = create_app()
        with app.app_context():
            current_app.logger.info("influx-writer init")
            self._verify_hosts_in_config()

            if "INFLUX_HOST" not in current_app.config:
                current_app.logger.critical("Influx service not defined. Sleeping {0} seconds and exiting.".format(self.ERROR_RETRY_DELAY))
                sleep(self.ERROR_RETRY_DELAY)
                sys.exit(-1)

            while True:
                try:
                    self.ls = InfluxListenStore({
                        'REDIS_HOST': current_app.config['REDIS_HOST'],
                        'REDIS_PORT': current_app.config['REDIS_PORT'],
                        'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'],
                        'INFLUX_HOST': current_app.config['INFLUX_HOST'],
                        'INFLUX_PORT': current_app.config['INFLUX_PORT'],
                        'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'],
                    }, logger=current_app.logger)
                    self.influx = InfluxDBClient(
                        host=current_app.config['INFLUX_HOST'],
                        port=current_app.config['INFLUX_PORT'],
                        database=current_app.config['INFLUX_DB_NAME'],
                    )
                    break
                except Exception as err:
                    current_app.logger.error("Cannot connect to influx: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True)
                    sleep(self.ERROR_RETRY_DELAY)

            while True:
                try:
                    self.redis = Redis(host=current_app.config['REDIS_HOST'], port=current_app.config['REDIS_PORT'], decode_responses=True)
                    self.redis.ping()
                    break
                except Exception as err:
                    current_app.logger.error("Cannot connect to redis: %s. Retrying in 2 seconds and trying again." % str(err), exc_info=True)
                    sleep(self.ERROR_RETRY_DELAY)

            while True:
                self.connect_to_rabbitmq()
                self.incoming_ch = self.connection.channel()
                self.incoming_ch.exchange_declare(exchange=current_app.config['INCOMING_EXCHANGE'], exchange_type='fanout')
                self.incoming_ch.queue_declare(current_app.config['INCOMING_QUEUE'], durable=True)
                self.incoming_ch.queue_bind(exchange=current_app.config['INCOMING_EXCHANGE'], queue=current_app.config['INCOMING_QUEUE'])
                self.incoming_ch.basic_consume(
                    lambda ch, method, properties, body: self.static_callback(ch, method, properties, body, obj=self),
                    queue=current_app.config['INCOMING_QUEUE'],
                )

                self.unique_ch = self.connection.channel()
                self.unique_ch.exchange_declare(exchange=current_app.config['UNIQUE_EXCHANGE'], exchange_type='fanout')

                current_app.logger.info("influx-writer started")
                try:
                    self.incoming_ch.start_consuming()
                except pika.exceptions.ConnectionClosed:
                    current_app.logger.warn("Connection to rabbitmq closed. Re-opening.", exc_info=True)
                    self.connection = None
                    continue

                self.connection.close()
Beispiel #10
0
def create_incremental(location, threads, dump_id):
    app = create_app()
    with app.app_context():
        from listenbrainz.webserver.timescale_connection import _ts as ls
        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found, exiting!",
                                         dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        prev_dump_entry = db_dump.get_dump_entry(dump_id - 1)
        if prev_dump_entry is None:  # incremental dumps must have a previous dump in the series
            current_app.logger.error(
                "Invalid dump ID %d, could not find previous dump", dump_id)
            sys.exit(-1)
        start_time = prev_dump_entry['created']
        current_app.logger.info("Dumping data from %s to %s", start_time,
                                end_time)

        dump_name = 'listenbrainz-dump-{dump_id}-{time}-incremental'.format(
            dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S'))
        dump_path = os.path.join(location, dump_name)
        create_path(dump_path)
        listens_dump_file = ls.dump_listens(dump_path,
                                            dump_id=dump_id,
                                            start_time=start_time,
                                            end_time=end_time,
                                            threads=threads)
        spark_dump_file = 'listenbrainz-listens-dump-{dump_id}-{time}-spark-incremental.tar.xz'.format(
            dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S'))
        spark_dump_path = os.path.join(location, dump_path, spark_dump_file)
        transmogrify_dump_file_to_spark_import_format(listens_dump_file,
                                                      spark_dump_path, threads)
        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            sys.exit(-1)

        try:
            if not sanity_check_dumps(dump_path, 6):
                return sys.exit(-1)
        except OSError as e:
            sys.exit(-1)

        # if in production, send an email to interested people for observability
        send_dump_creation_notification(dump_name, 'incremental')

        # Write the DUMP_ID file so that the FTP sync scripts can be more robust
        with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f:
            f.write("%s %s incremental\n" %
                    (end_time.strftime('%Y%m%d-%H%M%S'), dump_id))

        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)
        sys.exit(0)
Beispiel #11
0
from listenbrainz import db
from brainzutils import musicbrainz_db
from listenbrainz.webserver import create_app
from listenbrainz.webserver.views.api_tools import publish_data_to_queue
from listenbrainz.listenstore import InfluxListenStore
from listenbrainz.webserver.influx_connection import init_influx_connection
from werkzeug.exceptions import NotFound

import listenbrainz.db.user as db_user
import logging
import sqlalchemy

app = create_app()
influx = init_influx_connection(
    logging, {
        'REDIS_HOST': app.config['REDIS_HOST'],
        'REDIS_PORT': app.config['REDIS_PORT'],
        'REDIS_NAMESPACE': app.config['REDIS_NAMESPACE'],
        'INFLUX_HOST': app.config['INFLUX_HOST'],
        'INFLUX_PORT': app.config['INFLUX_PORT'],
        'INFLUX_DB_NAME': app.config['INFLUX_DB_NAME'],
    })


def update_row_ids_for_exceptions():
    with musicbrainz_db.engine.connect() as mb_connection:
        with db.engine.connect() as connection:
            # 2106 - Fée Deuspi
            result = mb_connection.execute(
                sqlalchemy.text("""
                SELECT id
Beispiel #12
0
 def setUp(self):
     super().setUp()
     self.tempdir = tempfile.mkdtemp()
     self.app = create_app()
 def setUp(self):
     super().setUp()
     self.tempdir = tempfile.mkdtemp()
     self.app = create_app()
 def setUp(self):
     self.app = create_app()
def calculate_user_similarity():
    application = webserver.create_app()
    with application.app_context():
        user_similarity.calculate_similar_users()
Beispiel #16
0
 def create_app(self):
     app = create_app()
     app.config['TESTING'] = True
     return app
from listenbrainz import db
from brainzutils import musicbrainz_db
from listenbrainz.webserver import create_app
from listenbrainz.webserver.views.api_tools import publish_data_to_queue
from listenbrainz.listenstore import InfluxListenStore
from listenbrainz.webserver.influx_connection import init_influx_connection
from werkzeug.exceptions import NotFound

import listenbrainz.db.user as db_user
import logging
import sqlalchemy

app = create_app()
influx = init_influx_connection(logging, {
            'REDIS_HOST': app.config['REDIS_HOST'],
            'REDIS_PORT': app.config['REDIS_PORT'],
            'REDIS_NAMESPACE': app.config['REDIS_NAMESPACE'],
            'INFLUX_HOST': app.config['INFLUX_HOST'],
            'INFLUX_PORT': app.config['INFLUX_PORT'],
            'INFLUX_DB_NAME': app.config['INFLUX_DB_NAME'],
        })


def update_row_ids_for_exceptions():
    with musicbrainz_db.engine.connect() as mb_connection:
        with db.engine.connect() as connection:
            # 2106 - Fée Deuspi
            result = mb_connection.execute(sqlalchemy.text("""
                SELECT id
                  FROM editor
                 WHERE name = 'Fée Deuspi'
 def setUp(self):
     super(BigQueryJobRunnerTestCase, self).setUp()
     self.app = create_app() # create a flask app for config purposes
     self.sc = BigQueryJobRunner()
     self.sc.bigquery = MagicMock()
     self.user = db_user.get_or_create(21, 'stats_calculator_test_user')
Beispiel #19
0
 def __init__(self, user_name):
     self.user_name = user_name
     self.max_time = datetime.now()
     self.app = create_app()
Beispiel #20
0
 def __init__(self):
     self.app = create_app()
 def __init__(self):
     self.app = create_app() # creating a flask app for config values and logging to Sentry
Beispiel #22
0
def update_user_emails():
    from listenbrainz.webserver.login import copy_files_from_mb_to_lb
    application = webserver.create_app()
    with application.app_context():
        copy_files_from_mb_to_lb.copy_emails()
Beispiel #23
0
def check_ftp_dump_ages():
    """
        Fetch the FTP dir listing of the full and incremental dumps and check their ages. Send mail
        to the observability list in case the dumps are too old.
    """

    msg = ""
    try:
        latest_file = _fetch_latest_file_info_from_ftp_dir(
            MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/fullexport')
        id, dt = _parse_ftp_name_with_id(latest_file)
        age = datetime.now() - dt
        if age > timedelta(days=FULLEXPORT_MAX_AGE):
            msg = "Full dump %d is more than %d days old: %s\n" % (
                id, FULLEXPORT_MAX_AGE, str(age))
            print(msg, end="")
        else:
            print("Full dump %s is %s old, good!" % (id, str(age)))
    except Exception as err:
        msg = "Cannot fetch full dump age: %s\n\n%s" % (str(err),
                                                        traceback.format_exc())

    try:
        latest_file = _fetch_latest_file_info_from_ftp_dir(
            MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/incremental')
        id, dt = _parse_ftp_name_with_id(latest_file)
        age = datetime.now() - dt
        if age > timedelta(hours=INCREMENTAL_MAX_AGE):
            msg = "Incremental dump %s is more than %s hours old: %s\n" % (
                id, INCREMENTAL_MAX_AGE, str(age))
            print(msg, end="")
        else:
            print("Incremental dump %s is %s old, good!" % (id, str(age)))
    except Exception as err:
        msg = "Cannot fetch incremental dump age: %s\n\n%s" % (
            str(err), traceback.format_exc())

    try:
        latest_file = _fetch_latest_file_info_from_ftp_dir(
            MAIN_FTP_SERVER_URL, '/pub/musicbrainz/listenbrainz/spark')
        id, dt = _parse_ftp_name_without_id(latest_file)
        age = datetime.now() - dt
        if age > timedelta(days=FEEDBACK_MAX_AGE):
            msg = "Feedback dump %s is more than %s days old: %s\n" % (
                id, FEEDBACK_MAX_AGE, str(age))
            print(msg, end="")
        else:
            print("Feedback dump %s is %s old, good!" % (id, str(age)))
    except Exception as err:
        msg = "Cannot fetch feedback dump age: %s\n\n%s" % (
            str(err), traceback.format_exc())

    app = create_app()
    with app.app_context():
        if not current_app.config['TESTING'] and msg:
            send_mail(subject="ListenBrainz outdated dumps!",
                      text=render_template('emails/data_dump_outdated.txt',
                                           msg=msg),
                      recipients=['*****@*****.**'],
                      from_name='ListenBrainz',
                      from_addr='noreply@' +
                      current_app.config['MAIL_FROM_DOMAIN'])
        elif msg:
            print(msg)
 def setUp(self):
     DatabaseTestCase.setUp(self)
     TimescaleTestCase.setUp(self)
     self.log = logging.getLogger(__name__)
     self.app = create_app()
     self.logstore = TimescaleListenStore(self.log)
Beispiel #25
0
from listenbrainz.webserver import create_app

application = create_app()
    def __init__(self):
        self.app = create_app(debug=True) # creating a flask app for config values

        self.log = logging.getLogger(__name__)
        logging.basicConfig()
        self.log.setLevel(logging.INFO)
Beispiel #27
0
 def setUp(self):
     self.app = create_app(debug=True) # create an app for config value access
def create_full(location, threads, dump_id, last_dump_id):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from InfluxDB

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
            dump_id (int): the ID of the ListenBrainz data dump
            last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table
    """
    app = create_app()
    with app.app_context():
        ls = init_influx_connection(
            current_app.logger, {
                'REDIS_HOST': current_app.config['REDIS_HOST'],
                'REDIS_PORT': current_app.config['REDIS_PORT'],
                'REDIS_NAMESPACE': current_app.config['REDIS_NAMESPACE'],
                'INFLUX_HOST': current_app.config['INFLUX_HOST'],
                'INFLUX_PORT': current_app.config['INFLUX_PORT'],
                'INFLUX_DB_NAME': current_app.config['INFLUX_DB_NAME'],
            })

        if last_dump_id:
            all_dumps = db_dump.get_dump_entries()
            if len(all_dumps) == 0:
                current_app.logger.error(
                    "Cannot create full dump with last dump's ID, no dump exists!"
                )
                sys.exit(-1)
            dump_id = all_dumps[0]['id']

        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found", dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        dump_path = os.path.join(
            location, 'listenbrainz-dump-{dump_id}-{time}-full'.format(
                dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S')))
        create_path(dump_path)
        db_dump.dump_postgres_db(dump_path, end_time, threads)
        ls.dump_listens(dump_path,
                        dump_id=dump_id,
                        end_time=end_time,
                        threads=threads,
                        spark_format=False)
        ls.dump_listens(dump_path,
                        dump_id=dump_id,
                        end_time=end_time,
                        threads=threads,
                        spark_format=True)
        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            return
        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)
Beispiel #29
0
def init_ts_db(force, create_db):
    """Initializes database.
    This process involves several steps:
    1. Table structure is created.
    2. Indexes are created.
    3. Views are created
    """
    from listenbrainz import config
    ts.init_db_connection(config.TIMESCALE_ADMIN_URI)
    if force:
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql'))
        if not res:
            raise Exception(
                'Failed to drop existing database and user! Exit code: %i' %
                res)

    if create_db or force:
        print('TS: Creating user and a database...')
        retries = 0
        while True:
            try:
                res = ts.run_sql_script_without_transaction(
                    os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql'))
                break
            except sqlalchemy.exc.OperationalError:
                print(
                    "Trapped template1 access error, FFS! Sleeping, trying again."
                )
                retries += 1
                if retries == 5:
                    raise
                sleep(1)
                continue

        if not res:
            raise Exception(
                'Failed to create new database and user! Exit code: %i' % res)

        ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI)
        print('TS: Creating database extensions...')
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql'))
    # Don't raise an exception if the extension already exists

    ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    application = webserver.create_app()
    with application.app_context():
        print('TS: Creating Schemas...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_schemas.sql'))

        print('TS: Creating Types...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql'))

        print('TS: Creating tables...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql'))

        print('TS: Creating Functions...')
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql'))

        print('TS: Creating views...')
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql'))

        print('TS: Creating indexes...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_indexes.sql'))
        ts.create_view_indexes()

        print('TS: Creating Primary and Foreign Keys...')
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql'))
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql'))

        print("Done!")
Beispiel #30
0
def update_user_listen_data():
    """ Scans listen table and update listen metadata for all users """
    application = webserver.create_app()
    with application.app_context():
        ts_update_user_listen_data()
Beispiel #31
0
def set_rate_limits(per_token_limit, per_ip_limit, window_size):
    from brainzutils.ratelimit import set_rate_limits
    application = webserver.create_app()
    with application.app_context():
        set_rate_limits(per_token_limit, per_ip_limit, window_size)
Beispiel #32
0
def delete_pending_listens():
    """ Complete all pending listen deletes since last cron run """
    application = webserver.create_app()
    with application.app_context():
        ts_delete_listens()
def create_full(location, threads, dump_id, do_listen_dump: bool,
                do_spark_dump: bool, do_db_dump: bool):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from the listenstore.

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
            dump_id (int): the ID of the ListenBrainz data dump
            do_listen_dump: If True, make a listens dump
            do_spark_dump: If True, make a spark listens dump
            do_db_dump: If True, make a public/private postgres/timescale dump
    """
    app = create_app()
    with app.app_context():
        ls = DumpListenStore(app)
        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found", dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        ts = end_time.strftime('%Y%m%d-%H%M%S')
        dump_name = 'listenbrainz-dump-{dump_id}-{time}-full'.format(
            dump_id=dump_id, time=ts)
        dump_path = os.path.join(location, dump_name)
        create_path(dump_path)

        expected_num_dumps = 0
        if do_db_dump:
            db_dump.dump_postgres_db(dump_path, end_time, threads)
            expected_num_dumps += 4
        if do_listen_dump:
            ls.dump_listens(dump_path,
                            dump_id=dump_id,
                            end_time=end_time,
                            threads=threads)
            expected_num_dumps += 1
        if do_spark_dump:
            ls.dump_listens_for_spark(dump_path,
                                      dump_id=dump_id,
                                      dump_type="full",
                                      end_time=end_time)
            expected_num_dumps += 1

        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            sys.exit(-1)

        try:
            # 6 types of dumps, archive, md5, sha256 for each
            expected_num_dump_files = expected_num_dumps * 3
            if not sanity_check_dumps(dump_path, expected_num_dump_files):
                return sys.exit(-1)
        except OSError:
            sys.exit(-1)

        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)

        # Write the DUMP_ID file so that the FTP sync scripts can be more robust
        with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f:
            f.write("%s %s full\n" % (ts, dump_id))

        # if in production, send an email to interested people for observability
        send_dump_creation_notification(dump_name, 'fullexport')

        sys.exit(0)
Beispiel #34
0
def delete_listens_and_update_metadata():
    """ Complete all pending listen deletes and also run update script for
    updating listen metadata since last cron run """
    application = webserver.create_app()
    with application.app_context():
        ts_delete_listens_and_update_user_listen_data()
Beispiel #35
0
    def start(self):
        app = create_app()
        with app.app_context():
            current_app.logger.info("timescale-writer init")
            self._verify_hosts_in_config()

            if "SQLALCHEMY_TIMESCALE_URI" not in current_app.config:
                current_app.logger.critical(
                    "Timescale service not defined. Sleeping {0} seconds and exiting."
                    .format(self.ERROR_RETRY_DELAY))
                sleep(self.ERROR_RETRY_DELAY)
                sys.exit(-1)

            try:
                while True:
                    try:
                        self.ls = TimescaleListenStore(
                            {
                                'REDIS_HOST':
                                current_app.config['REDIS_HOST'],
                                'REDIS_PORT':
                                current_app.config['REDIS_PORT'],
                                'REDIS_NAMESPACE':
                                current_app.config['REDIS_NAMESPACE'],
                                'SQLALCHEMY_TIMESCALE_URI':
                                current_app.config['SQLALCHEMY_TIMESCALE_URI']
                            },
                            logger=current_app.logger)
                        break
                    except Exception as err:
                        current_app.logger.error(
                            "Cannot connect to timescale: %s. Retrying in 2 seconds and trying again."
                            % str(err),
                            exc_info=True)
                        sleep(self.ERROR_RETRY_DELAY)

                while True:
                    try:
                        self.redis = Redis(
                            host=current_app.config['REDIS_HOST'],
                            port=current_app.config['REDIS_PORT'],
                            decode_responses=True)
                        self.redis.ping()
                        self.redis_listenstore = RedisListenStore(
                            current_app.logger, current_app.config)
                        break
                    except Exception as err:
                        current_app.logger.error(
                            "Cannot connect to redis: %s. Retrying in 2 seconds and trying again."
                            % str(err),
                            exc_info=True)
                        sleep(self.ERROR_RETRY_DELAY)

                while True:
                    self.connect_to_rabbitmq()
                    self.incoming_ch = self.connection.channel()
                    self.incoming_ch.exchange_declare(
                        exchange=current_app.config['INCOMING_EXCHANGE'],
                        exchange_type='fanout')
                    self.incoming_ch.queue_declare(
                        current_app.config['INCOMING_QUEUE'], durable=True)
                    self.incoming_ch.queue_bind(
                        exchange=current_app.config['INCOMING_EXCHANGE'],
                        queue=current_app.config['INCOMING_QUEUE'])
                    self.incoming_ch.basic_consume(
                        queue=current_app.config['INCOMING_QUEUE'],
                        on_message_callback=lambda ch, method, properties,
                        body: self.static_callback(
                            ch, method, properties, body, obj=self))

                    self.unique_ch = self.connection.channel()
                    self.unique_ch.exchange_declare(
                        exchange=current_app.config['UNIQUE_EXCHANGE'],
                        exchange_type='fanout')

                    try:
                        self.incoming_ch.start_consuming()
                    except pika.exceptions.ConnectionClosed:
                        current_app.logger.warn(
                            "Connection to rabbitmq closed. Re-opening.",
                            exc_info=True)
                        self.connection = None
                        continue

                    self.connection.close()

            except Exception:
                current_app.logger.error("failed to start timescale loop:",
                                         exc_info=True)
Beispiel #36
0
def add_missing_to_listen_users_metadata():
    application = webserver.create_app()
    with application.app_context():
        ts_add_missing_to_listen_users_metadata()
Beispiel #37
0
def create_full(location, threads, dump_id, last_dump_id):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from the listenstore

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
            dump_id (int): the ID of the ListenBrainz data dump
            last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table
    """
    app = create_app()
    with app.app_context():
        from listenbrainz.webserver.timescale_connection import _ts as ls
        if last_dump_id:
            all_dumps = db_dump.get_dump_entries()
            if len(all_dumps) == 0:
                current_app.logger.error(
                    "Cannot create full dump with last dump's ID, no dump exists!"
                )
                sys.exit(-1)
            dump_id = all_dumps[0]['id']

        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found", dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        ts = end_time.strftime('%Y%m%d-%H%M%S')
        dump_name = 'listenbrainz-dump-{dump_id}-{time}-full'.format(
            dump_id=dump_id, time=ts)
        dump_path = os.path.join(location, dump_name)
        create_path(dump_path)
        db_dump.dump_postgres_db(dump_path, end_time, threads)

        listens_dump_file = ls.dump_listens(dump_path,
                                            dump_id=dump_id,
                                            end_time=end_time,
                                            threads=threads)
        spark_dump_file = 'listenbrainz-listens-dump-{dump_id}-{time}-spark-full.tar.xz'.format(
            dump_id=dump_id, time=ts)
        spark_dump_path = os.path.join(location, dump_path, spark_dump_file)
        transmogrify_dump_file_to_spark_import_format(listens_dump_file,
                                                      spark_dump_path, threads)

        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            sys.exit(-1)

        try:
            if not sanity_check_dumps(dump_path, 12):
                return sys.exit(-1)
        except OSError as e:
            sys.exit(-1)

        # if in production, send an email to interested people for observability
        send_dump_creation_notification(dump_name, 'fullexport')

        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)

        # Write the DUMP_ID file so that the FTP sync scripts can be more robust
        with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f:
            f.write("%s %s full\n" % (ts, dump_id))

        sys.exit(0)
Beispiel #38
0
def notify_yim_users():
    application = webserver.create_app()
    with application.app_context():
        from listenbrainz.db import year_in_music
        year_in_music.notify_yim_users()
Beispiel #39
0
 def __init__(self):
     self.app = create_app() # creating a flask app for config values and logging to Sentry
Beispiel #40
0
 def create_app(self):
     app = create_app(debug=False)
     app.config['TESTING'] = True
     return app