Beispiel #1
0
def db_mock(monkeypatch):
    db = SessionManager("sqlite://", {})

    def get_db_read_replica():
        return db

    monkeypatch.setattr(src.utils.db_session, "get_db_read_replica",
                        get_db_read_replica)

    return db
Beispiel #2
0
def configure_celery(flask_app, celery, test_config=None):
    database_url = shared_config["db"]["url"]
    engine_args_literal = ast.literal_eval(
        shared_config["db"]["engine_args_literal"])
    redis_url = shared_config["redis"]["url"]

    if test_config is not None:
        if "db" in test_config:
            if "url" in test_config["db"]:
                database_url = test_config["db"]["url"]

    ipld_interval = int(
        shared_config["discprov"]["blacklist_block_indexing_interval"])
    # default is 5 seconds
    indexing_interval_sec = int(
        shared_config["discprov"]["block_processing_interval_sec"])

    # Update celery configuration
    celery.conf.update(
        imports=[
            "src.tasks.index", "src.tasks.index_blacklist",
            "src.tasks.index_plays", "src.tasks.index_metrics",
            "src.tasks.index_materialized_views",
            "src.tasks.index_network_peers", "src.tasks.index_trending",
            "src.tasks.cache_user_balance", "src.monitors.monitoring_queue",
            "src.tasks.cache_trending_playlists",
            "src.tasks.index_solana_plays", "src.tasks.index_aggregate_views"
        ],
        beat_schedule={
            "update_discovery_provider": {
                "task": "update_discovery_provider",
                "schedule": timedelta(seconds=indexing_interval_sec),
            },
            "update_ipld_blacklist": {
                "task": "update_ipld_blacklist",
                "schedule": timedelta(seconds=ipld_interval),
            },
            "update_play_count": {
                "task": "update_play_count",
                "schedule": timedelta(seconds=60)
            },
            "update_metrics": {
                "task": "update_metrics",
                "schedule": crontab(minute=0, hour="*")
            },
            "aggregate_metrics": {
                "task": "aggregate_metrics",
                "schedule": timedelta(minutes=METRICS_INTERVAL)
            },
            "synchronize_metrics": {
                "task": "synchronize_metrics",
                "schedule": crontab(minute=0, hour=1)
            },
            "update_materialized_views": {
                "task": "update_materialized_views",
                "schedule": timedelta(seconds=300)
            },
            "update_network_peers": {
                "task": "update_network_peers",
                "schedule": timedelta(seconds=30)
            },
            "index_trending": {
                "task": "index_trending",
                "schedule": crontab(minute=15, hour="*")
            },
            "update_user_balances": {
                "task": "update_user_balances",
                "schedule": timedelta(seconds=60)
            },
            "monitoring_queue": {
                "task": "monitoring_queue",
                "schedule": timedelta(seconds=60)
            },
            "cache_trending_playlists": {
                "task": "cache_trending_playlists",
                "schedule": timedelta(minutes=30)
            },
            "index_solana_plays": {
                "task": "index_solana_plays",
                "schedule": timedelta(seconds=5)
            },
            "update_aggregate_user": {
                "task": "update_aggregate_user",
                "schedule": timedelta(seconds=30)
            },
            "update_aggregate_track": {
                "task": "update_aggregate_track",
                "schedule": timedelta(seconds=30)
            },
            "update_aggregate_playlist": {
                "task": "update_aggregate_playlist",
                "schedule": timedelta(seconds=30)
            }
        },
        task_serializer="json",
        accept_content=["json"],
        broker_url=redis_url,
    )

    # Initialize DB object for celery task context
    db = SessionManager(database_url, engine_args_literal)
    logger.info('Database instance initialized!')
    # Initialize IPFS client for celery task context
    ipfs_client = IPFSClient(shared_config["ipfs"]["host"],
                             shared_config["ipfs"]["port"])

    # Initialize Redis connection
    redis_inst = redis.Redis.from_url(url=redis_url)
    # Clear existing locks used in tasks if present
    redis_inst.delete("disc_prov_lock")
    redis_inst.delete("network_peers_lock")
    redis_inst.delete("materialized_view_lock")
    redis_inst.delete("update_metrics_lock")
    redis_inst.delete("update_play_count_lock")
    redis_inst.delete("ipld_blacklist_lock")
    redis_inst.delete("update_discovery_lock")
    redis_inst.delete("aggregate_metrics_lock")
    redis_inst.delete("synchronize_metrics_lock")
    logger.info('Redis instance initialized!')

    # Initialize custom task context with database object
    class DatabaseTask(Task):
        def __init__(self, *args, **kwargs):
            self._db = db
            self._web3_provider = web3
            self._abi_values = abi_values
            self._shared_config = shared_config
            self._ipfs_client = ipfs_client
            self._redis = redis_inst
            self._eth_web3_provider = eth_web3
            self._solana_client = solana_client

        @property
        def abi_values(self):
            return self._abi_values

        @property
        def web3(self):
            return self._web3_provider

        @property
        def db(self):
            return self._db

        @property
        def shared_config(self):
            return self._shared_config

        @property
        def ipfs_client(self):
            return self._ipfs_client

        @property
        def redis(self):
            return self._redis

        @property
        def eth_web3(self):
            return self._eth_web3_provider

        @property
        def solana_client(self):
            return self._solana_client

    celery.autodiscover_tasks(["src.tasks"], "index", True)

    # Subclassing celery task with discovery provider context
    # Provided through properties defined in 'DatabaseTask'
    celery.Task = DatabaseTask

    celery.finalize()
Beispiel #3
0
def configure_flask(test_config, app, mode="app"):
    with app.app_context():
        app.iniconfig.read(config_files)

    # custom JSON serializer for timestamps
    class TimestampJSONEncoder(JSONEncoder):
        # pylint: disable=E0202
        def default(self, o):
            if isinstance(o, datetime.datetime):
                # ISO-8601 timestamp format
                return o.strftime("%Y-%m-%dT%H:%M:%S Z")
            return JSONEncoder.default(self, o)

    app.json_encoder = TimestampJSONEncoder

    database_url = app.config["db"]["url"]
    if test_config is not None:
        if "db" in test_config:
            if "url" in test_config["db"]:
                database_url = test_config["db"]["url"]

    # Sometimes ECS latency causes the create_database function to fail because db connection is not ready
    # Give it some more time to get set up, up to 5 times
    i = 0
    while i < 5:
        try:
            # Create database if necessary
            if not database_exists(database_url):
                create_database(database_url)
            else:
                break
        except exc.OperationalError as e:
            if "could not connect to server" in str(e):
                logger.warning(
                    "DB connection isn't up yet...setting a temporary timeout and trying again"
                )
                time.sleep(10)
            else:
                raise e

        i += 1

    if test_config is not None:
        # load the test config if passed in
        app.config.update(test_config)

    app.db_session_manager = SessionManager(
        app.config["db"]["url"],
        ast.literal_eval(app.config["db"]["engine_args_literal"]),
    )

    app.db_read_replica_session_manager = SessionManager(
        app.config["db"]["url_read_replica"],
        ast.literal_eval(app.config["db"]["engine_args_literal"]),
    )

    register_exception_handlers(app)
    app.register_blueprint(queries.bp)
    app.register_blueprint(search.bp)
    app.register_blueprint(search_queries.bp)
    app.register_blueprint(notifications.bp)
    app.register_blueprint(health_check.bp)
    app.register_blueprint(block_confirmation.bp)

    app.register_blueprint(api_v1.bp)
    app.register_blueprint(api_v1.bp_full)

    return app
Beispiel #4
0
def configure_flask(test_config, app, mode="app"):
    with app.app_context():
        app.iniconfig.read(config_files)

    # custom JSON serializer for timestamps
    class TimestampJSONEncoder(JSONEncoder):
        # pylint: disable=E0202
        def default(self, o):
            if isinstance(o, datetime.datetime):
                # ISO-8601 timestamp format
                return o.strftime("%Y-%m-%dT%H:%M:%S Z")
            return JSONEncoder.default(self, o)

    app.json_encoder = TimestampJSONEncoder

    database_url = app.config["db"]["url"]
    if test_config is not None:
        if "db" in test_config:
            if "url" in test_config["db"]:
                database_url = test_config["db"]["url"]

    # Sometimes ECS latency causes the create_database function to fail because db connection is not ready
    # Give it some more time to get set up, up to 5 times
    i = 0
    while i < 5:
        try:
            # Create database if necessary
            if not database_exists(database_url):
                create_database(database_url)
            else:
                break
        except exc.OperationalError as e:
            if "could not connect to server" in str(e):
                logger.warning(
                    "DB connection isn't up yet...setting a teporary timeout and trying again"
                )
                time.sleep(10)
            else:
                raise e

        i += 1

    # Conditionally perform alembic database upgrade to HEAD during
    # flask initialization
    if mode == "app":
        alembic_dir = os.getcwd()
        alembic_config = alembic.config.Config(f"{alembic_dir}/alembic.ini")
        alembic_config.set_main_option("sqlalchemy.url", str(database_url))
        with helpers.cd(alembic_dir):
            alembic.command.upgrade(alembic_config, "head")

    if test_config is not None:
        # load the test config if passed in
        app.config.update(test_config)

    app.db_session_manager = SessionManager(
        app.config["db"]["url"],
        ast.literal_eval(app.config["db"]["engine_args_literal"]),
    )
    with app.db_session_manager.scoped_session() as session:
        set_search_similarity(session)

    app.db_read_replica_session_manager = SessionManager(
        app.config["db"]["url_read_replica"],
        ast.literal_eval(app.config["db"]["engine_args_literal"]),
    )
    with app.db_read_replica_session_manager.scoped_session() as session:
        set_search_similarity(session)

    exceptions.register_exception_handlers(app)
    app.register_blueprint(queries.bp)
    app.register_blueprint(trending.bp)
    app.register_blueprint(search.bp)
    app.register_blueprint(search_queries.bp)
    app.register_blueprint(notifications.bp)
    app.register_blueprint(health_check.bp)

    app.register_blueprint(api_v1.bp)
    app.register_blueprint(api_v1.bp_full)

    return app
Beispiel #5
0
def configure_celery(flask_app, celery, test_config=None):
    database_url = shared_config["db"]["url"]
    engine_args_literal = ast.literal_eval(
        shared_config["db"]["engine_args_literal"])
    redis_url = shared_config["redis"]["url"]

    if test_config is not None:
        if "db" in test_config:
            if "url" in test_config["db"]:
                database_url = test_config["db"]["url"]

    # Update celery configuration
    celery.conf.update(
        imports=["src.tasks.index", "src.tasks.index_blacklist",
                 "src.tasks.index_cache", "src.tasks.index_plays", "src.tasks.index_metrics"],
        beat_schedule={
            "update_discovery_provider": {
                "task": "update_discovery_provider",
                "schedule": timedelta(seconds=5),
            },
            "update_ipld_blacklist": {
                "task": "update_ipld_blacklist",
                "schedule": timedelta(seconds=60),
            },
            "update_cache": {
                "task": "update_discovery_cache",
                "schedule": timedelta(seconds=60)
            },
            "update_play_count": {
                "task": "update_play_count",
                "schedule": timedelta(seconds=10)
            },
            "update_metrics": {
                "task": "update_metrics",
                "schedule": crontab(minute=0, hour="*")
            }
        },
        task_serializer="json",
        accept_content=["json"],
        broker_url=redis_url,
    )

    # Initialize DB object for celery task context
    db = SessionManager(database_url, engine_args_literal)
    logger.info('Database instance initialized!')

    # Initialize IPFS client for celery task context
    gateway_addrs = shared_config["ipfs"]["gateway_hosts"].split(',')
    gateway_addrs.append(
        shared_config["discprov"]["user_metadata_service_url"])
    logger.warning(f"__init__.py | {gateway_addrs}")
    ipfs_client = IPFSClient(
        shared_config["ipfs"]["host"], shared_config["ipfs"]["port"], gateway_addrs
    )

    # Initialize Redis connection
    redis_inst = redis.Redis.from_url(url=redis_url)

    # Clear existing lock if present
    redis_inst.delete("disc_prov_lock")
    logger.info('Redis instance initialized!')

    # Initialize custom task context with database object
    class DatabaseTask(Task):
        def __init__(self, *args, **kwargs):
            self._db = db
            self._web3_provider = web3
            self._abi_values = abi_values
            self._shared_config = shared_config
            self._ipfs_client = ipfs_client
            self._redis = redis_inst

        @property
        def abi_values(self):
            return self._abi_values

        @property
        def web3(self):
            return self._web3_provider

        @property
        def db(self):
            return self._db

        @property
        def shared_config(self):
            return self._shared_config

        @property
        def ipfs_client(self):
            return self._ipfs_client

        @property
        def redis(self):
            return self._redis

    celery.autodiscover_tasks(["src.tasks"], "index", True)

    # Subclassing celery task with discovery provider context
    # Provided through properties defined in 'DatabaseTask'
    celery.Task = DatabaseTask

    celery.finalize()
Beispiel #6
0
def configure_celery(celery, test_config=None):
    database_url = shared_config["db"]["url"]
    redis_url = shared_config["redis"]["url"]

    if test_config is not None:
        if "db" in test_config:
            if "url" in test_config["db"]:
                database_url = test_config["db"]["url"]

    ipld_interval = int(
        shared_config["discprov"]["blacklist_block_indexing_interval"])
    # default is 5 seconds
    indexing_interval_sec = int(
        shared_config["discprov"]["block_processing_interval_sec"])

    # Update celery configuration
    celery.conf.update(
        imports=[
            "src.tasks.index",
            "src.tasks.index_blacklist",
            "src.tasks.index_metrics",
            "src.tasks.index_materialized_views",
            "src.tasks.aggregates.index_aggregate_plays",
            "src.tasks.index_aggregate_monthly_plays",
            "src.tasks.index_hourly_play_counts",
            "src.tasks.vacuum_db",
            "src.tasks.index_network_peers",
            "src.tasks.index_trending",
            "src.tasks.cache_user_balance",
            "src.monitors.monitoring_queue",
            "src.tasks.cache_trending_playlists",
            "src.tasks.index_solana_plays",
            "src.tasks.index_aggregate_views",
            "src.tasks.index_aggregate_user",
            "src.tasks.aggregates.index_aggregate_track",
            "src.tasks.index_challenges",
            "src.tasks.index_user_bank",
            "src.tasks.index_eth",
            "src.tasks.index_oracles",
            "src.tasks.index_rewards_manager",
            "src.tasks.index_related_artists",
            "src.tasks.calculate_trending_challenges",
            "src.tasks.index_listen_count_milestones",
            "src.tasks.user_listening_history.index_user_listening_history",
            "src.tasks.prune_plays",
            "src.tasks.index_spl_token",
            "src.tasks.index_solana_user_data",
            "src.tasks.index_aggregate_tips",
            "src.tasks.index_reactions",
        ],
        beat_schedule={
            "update_discovery_provider": {
                "task": "update_discovery_provider",
                "schedule": timedelta(seconds=indexing_interval_sec),
            },
            "update_ipld_blacklist": {
                "task": "update_ipld_blacklist",
                "schedule": timedelta(seconds=ipld_interval),
            },
            "update_metrics": {
                "task": "update_metrics",
                "schedule": crontab(minute=0, hour="*"),
            },
            "aggregate_metrics": {
                "task": "aggregate_metrics",
                "schedule": timedelta(minutes=METRICS_INTERVAL),
            },
            "synchronize_metrics": {
                "task": "synchronize_metrics",
                "schedule": timedelta(minutes=SYNCHRONIZE_METRICS_INTERVAL),
            },
            "update_materialized_views": {
                "task": "update_materialized_views",
                "schedule": timedelta(seconds=300),
            },
            "update_aggregate_plays": {
                "task": "update_aggregate_plays",
                "schedule": timedelta(seconds=15),
            },
            "index_hourly_play_counts": {
                "task": "index_hourly_play_counts",
                "schedule": timedelta(seconds=30),
            },
            "vacuum_db": {
                "task": "vacuum_db",
                "schedule": timedelta(days=1),
            },
            "update_network_peers": {
                "task": "update_network_peers",
                "schedule": timedelta(seconds=30),
            },
            "index_trending": {
                "task": "index_trending",
                "schedule": timedelta(seconds=10),
            },
            "update_user_balances": {
                "task": "update_user_balances",
                "schedule": timedelta(seconds=60),
            },
            "monitoring_queue": {
                "task": "monitoring_queue",
                "schedule": timedelta(seconds=60),
            },
            "cache_trending_playlists": {
                "task": "cache_trending_playlists",
                "schedule": timedelta(minutes=30),
            },
            "index_solana_plays": {
                "task": "index_solana_plays",
                "schedule": timedelta(seconds=5),
            },
            "update_aggregate_user": {
                "task": "update_aggregate_user",
                "schedule": timedelta(seconds=30),
            },
            "update_aggregate_track": {
                "task": "update_aggregate_track",
                "schedule": timedelta(seconds=30),
            },
            "update_aggregate_playlist": {
                "task": "update_aggregate_playlist",
                "schedule": timedelta(seconds=30),
            },
            "index_user_bank": {
                "task": "index_user_bank",
                "schedule": timedelta(seconds=5),
            },
            "index_challenges": {
                "task": "index_challenges",
                "schedule": timedelta(seconds=5),
            },
            "index_eth": {
                "task": "index_eth",
                "schedule": timedelta(seconds=10),
            },
            "index_oracles": {
                "task": "index_oracles",
                "schedule": timedelta(minutes=5),
            },
            "index_rewards_manager": {
                "task": "index_rewards_manager",
                "schedule": timedelta(seconds=5),
            },
            "index_related_artists": {
                "task": "index_related_artists",
                "schedule": timedelta(seconds=60),
            },
            "index_listen_count_milestones": {
                "task": "index_listen_count_milestones",
                "schedule": timedelta(seconds=5),
            },
            "index_user_listening_history": {
                "task": "index_user_listening_history",
                "schedule": timedelta(seconds=5),
            },
            "index_aggregate_monthly_plays": {
                "task": "index_aggregate_monthly_plays",
                "schedule": crontab(minute=0, hour=0),  # daily at midnight
            },
            "prune_plays": {
                "task": "prune_plays",
                "schedule": crontab(
                    minute="*/15",
                    hour="14, 15",
                ),  # 8x a day during non peak hours
            },
            "index_spl_token": {
                "task": "index_spl_token",
                "schedule": timedelta(seconds=5),
            },
            "index_aggregate_tips": {
                "task": "index_aggregate_tips",
                "schedule": timedelta(seconds=5),
            },
            "index_reactions": {
                "task": "index_reactions",
                "schedule": timedelta(seconds=5),
            }
            # UNCOMMENT BELOW FOR MIGRATION DEV WORK
            # "index_solana_user_data": {
            #     "task": "index_solana_user_data",
            #     "schedule": timedelta(seconds=5),
            # },
        },
        task_serializer="json",
        accept_content=["json"],
        broker_url=redis_url,
    )

    # Initialize DB object for celery task context
    db = SessionManager(
        database_url,
        ast.literal_eval(shared_config["db"]["engine_args_literal"]))
    logger.info("Database instance initialized!")

    # Initialize Redis connection
    redis_inst = redis.Redis.from_url(url=redis_url)

    # Initialize CIDMetadataClient for celery task context
    cid_metadata_client = CIDMetadataClient(
        eth_web3,
        shared_config,
        redis_inst,
        eth_abi_values,
    )

    # Clear last scanned redis block on startup
    delete_last_scanned_eth_block_redis(redis_inst)

    # Initialize Anchor Indexer
    anchor_program_indexer = AnchorProgramIndexer(
        shared_config["solana"]["anchor_data_program_id"],
        shared_config["solana"]["anchor_admin_storage_public_key"],
        "index_solana_user_data",
        redis_inst,
        db,
        solana_client_manager,
        cid_metadata_client,
    )

    # Clear existing locks used in tasks if present
    redis_inst.delete("disc_prov_lock")
    redis_inst.delete("network_peers_lock")
    redis_inst.delete("materialized_view_lock")
    redis_inst.delete("update_metrics_lock")
    redis_inst.delete("update_play_count_lock")
    redis_inst.delete("index_hourly_play_counts_lock")
    redis_inst.delete("ipld_blacklist_lock")
    redis_inst.delete("update_discovery_lock")
    redis_inst.delete("aggregate_metrics_lock")
    redis_inst.delete("synchronize_metrics_lock")
    redis_inst.delete("solana_plays_lock")
    redis_inst.delete("index_challenges_lock")
    redis_inst.delete("user_bank_lock")
    redis_inst.delete("index_eth_lock")
    redis_inst.delete("index_oracles_lock")
    redis_inst.delete("solana_rewards_manager_lock")
    redis_inst.delete("calculate_trending_challenges_lock")
    redis_inst.delete("index_user_listening_history_lock")
    redis_inst.delete("prune_plays_lock")
    redis_inst.delete("update_aggregate_table:aggregate_user_tips")
    redis_inst.delete(INDEX_REACTIONS_LOCK)

    logger.info("Redis instance initialized!")

    # Initialize custom task context with database object
    class WrappedDatabaseTask(DatabaseTask):
        def __init__(self, *args, **kwargs):
            DatabaseTask.__init__(
                self,
                db=db,
                web3=web3,
                abi_values=abi_values,
                eth_abi_values=eth_abi_values,
                shared_config=shared_config,
                cid_metadata_client=cid_metadata_client,
                redis=redis_inst,
                eth_web3_provider=eth_web3,
                solana_client_manager=solana_client_manager,
                challenge_event_bus=setup_challenge_bus(),
                anchor_program_indexer=anchor_program_indexer,
            )

    celery.autodiscover_tasks(["src.tasks"], "index", True)

    # Subclassing celery task with discovery provider context
    # Provided through properties defined in 'DatabaseTask'
    celery.Task = WrappedDatabaseTask

    celery.finalize()
def test_migration_idempotency():
    """
    Test the migrations are idempotent -- we can re-run them and they
    succeed. This is a useful test in making sure that during service upgrade
    a service provider may retry a migration multiple times.

    Because not all migrations are historically idempotent, this checking begins at
    the migration following START_MIGRATION
    """

    # Drop DB, ensuring migration performed at start
    if database_exists(DB_URL):
        drop_database(DB_URL)

    create_database(DB_URL)
    session_manager = SessionManager(DB_URL, {})

    # Run db migrations because the db gets dropped at the start of the tests
    alembic_dir = os.getcwd()
    alembic_config = alembic.config.Config(f"{alembic_dir}/alembic.ini")
    alembic_config.set_main_option("sqlalchemy.url", str(DB_URL))
    alembic_config.set_main_option("mode", "test")

    buf = steal_stdout()
    alembic_config.stdout = buf

    # Alembic commands print out instead of returning...
    alembic.command.history(alembic_config)
    # Rows of this output look like
    # b3084b7bc025 -> 5add54e23282, add stems support
    versions = buf.getvalue().decode("utf-8")

    def get_version(line):
        m = re.search("(?P<old_version>.{12}) -> (?P<new_version>.{12}).*",
                      line.strip())
        if m:
            return m.group("new_version")
        return None

    versions = list(filter(None, map(get_version, versions.split("\n"))))
    # Ordered (chronological) list of all alembic revisions
    versions_in_chronological_order = list(reversed(versions))

    alembic_config.stdout = sys.stdout

    # Find migration to start at
    start_index = 0
    for i in range(len(versions_in_chronological_order)):
        if versions_in_chronological_order[i] == START_MIGRATION:
            start_index = i
            break

    # Apply migrations 1 by 1, each time resetting the stored alembic version
    # in the database and replaying the migration twice to test it's idempotency
    prev_version = START_MIGRATION
    for version in versions_in_chronological_order[start_index:]:
        print(f"Running migration {version}")
        alembic.command.upgrade(alembic_config, version)

        # Revert to prev_version
        with session_manager.scoped_session() as session:
            session.execute(
                sqlalchemy.text(
                    f"UPDATE alembic_version SET version_num = '{prev_version}' WHERE version_num = '{version}'"
                ))

        print(f"Running migration {version}")
        alembic.command.upgrade(alembic_config, version)

        prev_version = version

    drop_database(DB_URL)