def cache_trending(db, redis, strategy): with db.scoped_session() as session: for time_range in TIME_RANGES: key = make_trending_cache_key(time_range, strategy.version) res = make_get_unpopulated_playlists(session, time_range, strategy)() set_json_cached_key(redis, key, res)
def test_json_cache_single_key(redis_mock): """Test that values may be set and fetched from the redis cache""" set_json_cached_key(redis_mock, "key", { "name": "joe", "favorite_band": "Pink" }) assert get_json_cached_key(redis_mock, "key") == { "name": "joe", "favorite_band": "Pink", }
def fetch_cnode_info(sp_id, sp_factory_instance, redis): sp_id_key = get_sp_id_key(sp_id) sp_info_cached = get_json_cached_key(redis, sp_id_key) if sp_info_cached: logger.info( f"eth_contract_helpers.py | Found cached value for spID={sp_id} - {sp_info_cached}" ) return sp_info_cached cn_endpoint_info = sp_factory_instance.functions.getServiceEndpointInfo( content_node_service_type, sp_id ).call() set_json_cached_key(redis, sp_id_key, cn_endpoint_info, cnode_info_redis_ttl) logger.info( f"eth_contract_helpers.py | Configured redis {sp_id_key} - {cn_endpoint_info} - TTL {cnode_info_redis_ttl}" ) return cn_endpoint_info
def set_indexing_error( redis_instance, blocknumber, blockhash, txhash, message, has_consensus=False ): indexing_error = get_json_cached_key(redis_instance, INDEXING_ERROR_KEY) if indexing_error is None or ( indexing_error["blocknumber"] != blocknumber or indexing_error["blockhash"] != blockhash or indexing_error["txhash"] != txhash ): indexing_error = { "count": 1, "blocknumber": blocknumber, "blockhash": blockhash, "txhash": txhash, "message": message, "has_consensus": has_consensus, } set_json_cached_key(redis_instance, INDEXING_ERROR_KEY, indexing_error) else: indexing_error["count"] += 1 indexing_error["has_consensus"] = has_consensus set_json_cached_key(redis_instance, INDEXING_ERROR_KEY, indexing_error)
def set_tracks_in_cache(tracks): redis = redis_connection.get_redis() for track in tracks: key = get_track_id_cache_key(track["track_id"]) set_json_cached_key(redis, key, track, ttl_sec)
def index_trending(self, db: SessionManager, redis: Redis, timestamp): logger.info("index_trending.py | starting indexing") update_start = time.time() metric = PrometheusMetric( "index_trending_runtime_seconds", "Runtimes for src.task.index_trending:index_trending()", ) with db.scoped_session() as session: genres = get_genres(session) # Make sure to cache empty genre genres.append(None) # type: ignore trending_track_versions = trending_strategy_factory.get_versions_for_type( TrendingType.TRACKS).keys() update_view(session, AGGREGATE_INTERVAL_PLAYS) update_view(session, TRENDING_PARAMS) for version in trending_track_versions: strategy = trending_strategy_factory.get_strategy( TrendingType.TRACKS, version) if strategy.use_mat_view: strategy.update_track_score_query(session) for version in trending_track_versions: strategy = trending_strategy_factory.get_strategy( TrendingType.TRACKS, version) for genre in genres: for time_range in time_ranges: cache_start_time = time.time() if strategy.use_mat_view: res = generate_unpopulated_trending_from_mat_views( session, genre, time_range, strategy) else: res = generate_unpopulated_trending( session, genre, time_range, strategy) key = make_trending_cache_key(time_range, genre, version) set_json_cached_key(redis, key, res) cache_end_time = time.time() total_time = cache_end_time - cache_start_time logger.info( f"index_trending.py | Cached trending ({version.name} version) \ for {genre}-{time_range} in {total_time} seconds") # Cache underground trending underground_trending_versions = trending_strategy_factory.get_versions_for_type( TrendingType.UNDERGROUND_TRACKS).keys() for version in underground_trending_versions: strategy = trending_strategy_factory.get_strategy( TrendingType.UNDERGROUND_TRACKS, version) cache_start_time = time.time() res = make_get_unpopulated_tracks(session, redis, strategy)() key = make_underground_trending_cache_key(version) set_json_cached_key(redis, key, res) cache_end_time = time.time() total_time = cache_end_time - cache_start_time logger.info( f"index_trending.py | Cached underground trending ({version.name} version) \ in {total_time} seconds") update_end = time.time() update_total = update_end - update_start metric.save_time() logger.info( f"index_trending.py | Finished indexing trending in {update_total} seconds", extra={ "job": "index_trending", "total_time": update_total }, ) # Update cache key to track the last time trending finished indexing redis.set(trending_tracks_last_completion_redis_key, int(update_end)) set_last_trending_datetime(redis, timestamp)
def cleanup(): set_json_cached_key(redis, INDEXING_ERROR_KEY, None) # clear indexing error yield remove_test_file(track_metadata_json_file)
def set_users_in_cache(users): redis = redis_connection.get_redis() for user in users: key = get_user_id_cache_key(user["user_id"]) set_json_cached_key(redis, key, user, ttl_sec)
def set_playlists_in_cache(playlists): redis = redis_connection.get_redis() for playlist in playlists: key = get_playlist_id_cache_key(playlist["playlist_id"]) set_json_cached_key(redis, key, playlist, ttl_sec)
def process_solana_plays(solana_client_manager: SolanaClientManager, redis: Redis): try: base58.b58decode(TRACK_LISTEN_PROGRAM) except ValueError: logger.info( f"index_solana_plays.py" f"Invalid TrackListenCount program ({TRACK_LISTEN_PROGRAM}) configured, exiting." ) return db = index_solana_plays.db # Highest currently processed slot in the DB latest_processed_slot = get_latest_slot(db) logger.info( f"index_solana_plays.py | latest used slot: {latest_processed_slot}") # Utilize the cached tx to offset cached_offset_tx = fetch_traversed_tx_from_cache(redis, latest_processed_slot) # The 'before' value from where we start querying transactions last_tx_signature = cached_offset_tx # Loop exit condition intersection_found = False # List of signatures that will be populated as we traverse recent operations transaction_signatures = [] # Current batch of transactions transaction_signature_batch = [] # Current batch page_count = 0 # The last transaction processed last_tx = None # Get the latests slot available globally before fetching txs to keep track of indexing progress try: latest_global_slot = solana_client_manager.get_slot() except: logger.error("index_solana_plays.py | Failed to get block height") # Traverse recent records until an intersection is found with existing Plays table while not intersection_found: logger.info( f"index_solana_plays.py | Requesting transactions before {last_tx_signature}" ) transactions_history = solana_client_manager.get_signatures_for_address( TRACK_LISTEN_PROGRAM, before=last_tx_signature, limit=FETCH_TX_SIGNATURES_BATCH_SIZE, ) logger.info( f"index_solana_plays.py | Retrieved transactions before {last_tx_signature}" ) transactions_array = transactions_history["result"] if not transactions_array: # This is considered an 'intersection' since there are no further transactions to process but # really represents the end of known history for this ProgramId intersection_found = True logger.info( f"index_solana_plays.py | No transactions found before {last_tx_signature}" ) else: with db.scoped_session() as read_session: for tx in transactions_array: tx_sig = tx["signature"] slot = tx["slot"] if tx["slot"] > latest_processed_slot: transaction_signature_batch.append(tx_sig) elif tx["slot"] <= latest_processed_slot: # Check the tx signature for any txs in the latest batch, # and if not present in DB, add to processing logger.info( f"index_solana_plays.py | Latest slot re-traversal\ slot={slot}, sig={tx_sig},\ latest_processed_slot(db)={latest_processed_slot}") exists = get_tx_in_db(read_session, tx_sig) if exists: # Exit loop and set terminal condition since this tx has been found in DB # Transactions are returned with most recently committed first, so we can assume # subsequent transactions in this batch have already been processed intersection_found = True break # Otherwise, ensure this transaction is still processed transaction_signature_batch.append(tx_sig) # Restart processing at the end of this transaction signature batch last_tx = transactions_array[-1] last_tx_signature = last_tx["signature"] # Append to recently seen cache cache_traversed_tx(redis, last_tx) # Append batch of processed signatures if transaction_signature_batch: transaction_signatures.append(transaction_signature_batch) # Reset batch state transaction_signature_batch = [] logger.info( f"index_solana_plays.py | intersection_found={intersection_found},\ last_tx_signature={last_tx_signature},\ page_count={page_count}") page_count = page_count + 1 transaction_signatures.reverse() for tx_sig_batch in transaction_signatures: for tx_sig_batch_records in split_list(tx_sig_batch, TX_SIGNATURES_PROCESSING_SIZE): parse_sol_tx_batch(db, solana_client_manager, redis, tx_sig_batch_records) try: if transaction_signatures and transaction_signatures[-1]: last_tx_sig = transaction_signatures[-1][-1] tx_info = solana_client_manager.get_sol_tx_info(last_tx_sig) tx_result: TransactionInfoResult = tx_info["result"] set_json_cached_key( redis, CURRENT_PLAY_INDEXING, { "slot": tx_result["slot"], "timestamp": tx_result["blockTime"] }, ) except Exception as e: logger.error( "index_solana_plays.py | Unable to set redis current play indexing", exc_info=True, ) raise e if last_tx: redis.set(latest_sol_plays_slot_key, last_tx["slot"]) elif latest_global_slot is not None: redis.set(latest_sol_plays_slot_key, latest_global_slot)
def test_json_cache_date_value(redis_mock): date = datetime(2016, 2, 18, 9, 50, 20) set_json_cached_key(redis_mock, "key", {"date": date}) result = get_json_cached_key(redis_mock, "key") assert parser.parse(result["date"]) == date
def test_json_cache_multiple_keys(redis_mock): set_json_cached_key(redis_mock, "key1", {"name": "captain america"}) set_json_cached_key(redis_mock, "key2", {"name": "thor"}) set_json_cached_key(redis_mock, "key3", {"name": "iron man"}) set_json_cached_key(redis_mock, "key4", {"name": "hulk"}) # skip key5, which should report None set_json_cached_key(redis_mock, "key6", {"name": "hawkeye"}) redis_mock.set("key7", "cannot_serialize") set_json_cached_key(redis_mock, "key8", {"name": "spiderman"}) results = get_all_json_cached_key( redis_mock, ["key1", "key2", "key3", "key4", "key5", "key6", "key7", "key8"]) assert results == [ { "name": "captain america" }, { "name": "thor" }, { "name": "iron man" }, { "name": "hulk" }, None, { "name": "hawkeye" }, None, { "name": "spiderman" }, ]
def test_listen_count_milestone_processing(app): redis_conn = redis.Redis.from_url(url=REDIS_URL) set_json_cached_key( redis_conn, CURRENT_PLAY_INDEXING, {"slot": 12, "timestamp": 1634836054} ) with app.app_context(): db = get_db() test_entities = { "plays": [{"item_id": 1} for _ in range(8)] + [{"item_id": 2} for _ in range(10)] # milestone 10 + [{"item_id": 3} for _ in range(11)] # milestone 10 + [{"item_id": 4} for _ in range(12)] # milestone 10 + [{"item_id": 5} for _ in range(25)] # milestone 25 + [{"item_id": 6} for _ in range(27)] # milestone 25 + [{"item_id": 7} for _ in range(40)] # milestone 25 + [{"item_id": 8} for _ in range(80)] # milestone 50 + [{"item_id": 9} for _ in range(111)] # milestone 100 + [{"item_id": 10} for _ in range(25)] # milestone 25 } populate_mock_db(db, test_entities) with db.scoped_session() as session: _update_aggregate_plays(session) redis_conn.sadd(TRACK_LISTEN_IDS, *track_ids) index_listen_count_milestones(db, redis_conn) with db.scoped_session() as session: milestones = session.query(Milestone).all() assert len(milestones) == 9 sorted_milestones = sorted(milestones, key=lambda m: m.id) sorted_milestones = [ (milestone.id, milestone.threshold, milestone.slot, milestone.timestamp) for milestone in sorted_milestones ] assert sorted_milestones == [ (2, 10, 12, datetime.fromtimestamp(1634836054)), (3, 10, 12, datetime.fromtimestamp(1634836054)), (4, 10, 12, datetime.fromtimestamp(1634836054)), (5, 25, 12, datetime.fromtimestamp(1634836054)), (6, 25, 12, datetime.fromtimestamp(1634836054)), (7, 25, 12, datetime.fromtimestamp(1634836054)), (8, 50, 12, datetime.fromtimestamp(1634836054)), (9, 100, 12, datetime.fromtimestamp(1634836054)), (10, 25, 12, datetime.fromtimestamp(1634836054)), ] # Add the same track and process to check that no new milesetones are created redis_conn.sadd(TRACK_LISTEN_IDS, *track_ids) index_listen_count_milestones(db, redis_conn) with db.scoped_session() as session: milestones = session.query(Milestone).all() assert len(milestones) == 9 test_entities = { "plays": [ {"item_id": 1, "id": 1000 + i} for i in range(3) ] # 3 + 8 = 11 new + [{"item_id": 2, "id": 1200 + i} for i in range(100)] # 10 + 100 = 110 new + [ {"item_id": 3, "id": 1400 + i} for i in range(10) ] # 10 + 11 = 21 not new + [ {"item_id": 4, "id": 1600 + i} for i in range(1000) ] # 1000 + 12 = 1012 new + [ {"item_id": 8, "id": 3000 + i} for i in range(19) ] # 19 + 80 = 99 not new + [ {"item_id": 9, "id": 9000 + i} for i in range(5000) ] # 5000 + 111 = 5111 new } populate_mock_db(db, test_entities) with db.scoped_session() as session: _update_aggregate_plays(session) # Add the same track and process to check that no new milesetones are created redis_conn.sadd(TRACK_LISTEN_IDS, *track_ids) set_json_cached_key( redis_conn, CURRENT_PLAY_INDEXING, {"slot": 14, "timestamp": 1634836056} ) index_listen_count_milestones(db, redis_conn) with db.scoped_session() as session: milestones = session.query(Milestone).filter(Milestone.slot == 14).all() assert len(milestones) == 4 sorted_milestones = sorted(milestones, key=lambda m: m.id) sorted_milestones = [ (milestone.id, milestone.threshold) for milestone in sorted_milestones ] assert sorted_milestones == [(1, 10), (2, 100), (4, 1000), (9, 5000)] # Add a track that's not been indexed yet redis_conn.sadd(TRACK_LISTEN_IDS, 20) set_json_cached_key( redis_conn, CURRENT_PLAY_INDEXING, {"slot": 14, "timestamp": 1634836056} ) index_listen_count_milestones(db, redis_conn)