def increment_listen_count_for_day(self, day: datetime, count: int): """ Increment the number of listens submitted on the day `day` by `count`. """ key = cache._prep_key(self.LISTEN_COUNT_PER_DAY_KEY + day.strftime('%Y%m%d')) cache._r.incrby(key, count) cache._r.expire(key, self.LISTEN_COUNT_PER_DAY_EXPIRY_TIME)
def delete_listen(self, listened_at: int, user_name: str, recording_msid: str): """ Delete a particular listen for user with specified MusicBrainz ID. Args: listened_at: The timestamp of the listen user_name: the username of the user recording_msid: the MessyBrainz ID of the recording Raises: TimescaleListenStoreException if unable to delete the listen """ args = { 'listened_at': listened_at, 'user_name': user_name, 'recording_msid': recording_msid } query = """DELETE FROM listen WHERE listened_at = :listened_at AND user_name = :user_name AND data -> 'track_metadata' -> 'additional_info' ->> 'recording_msid' = :recording_msid """ try: with timescale.engine.connect() as connection: connection.execute(sqlalchemy.text(query), args) cache._r.decrby( cache._prep_key(REDIS_USER_LISTEN_COUNT + user_name)) except psycopg2.OperationalError as e: self.log.error("Cannot delete listen for user: %s" % str(e)) raise TimescaleListenStoreException
def get_recent_listens(self, max = RECENT_LISTENS_MAX): """ Get the max number of most recent listens """ recent = [] for listen in cache._r.zrevrange(cache._prep_key(self.RECENT_LISTENS_KEY), 0, max - 1): recent.append(Listen.from_json(ujson.loads(listen))) return recent
def update_recent_listens(self, unique): """ Store the most recent listens in redis so we can fetch them easily for a recent listens page. This is not a critical action, so if it fails, it fails. Let's live with it. """ recent = {} for listen in unique: recent[ujson.dumps(listen.to_json()).encode('utf-8')] = float(listen.ts_since_epoch) # Don't take this very seriously -- if it fails, really no big deal. Let is go. if recent: cache._r.zadd(cache._prep_key(self.RECENT_LISTENS_KEY), recent, nx=True) # Don't prune the sorted list each time, but only when it reaches twice the desired size count = cache._r.zcard(cache._prep_key(self.RECENT_LISTENS_KEY)) if count > (self.RECENT_LISTENS_MAX * 2): cache._r.zpopmin(cache._prep_key(self.RECENT_LISTENS_KEY), count - self.RECENT_LISTENS_MAX - 1)
def set_listen_count_expiry_for_user(self, user_name): """ Set an expire time for the listen count cache item. This is called after a bulk import which allows for timescale continuous aggregates to catch up to listen counts. Once the key expires, the correct value can be calculated from the aggregate. Args: user_name: the musicbrainz id of user whose listen count needs an expiry time """ cache._r.expire(cache._prep_key(REDIS_USER_LISTEN_COUNT + user_name), REDIS_POST_IMPORT_LISTEN_COUNT_EXPIRY)
def insert(self, listens): """ Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate. """ submit = [] for listen in listens: submit.append(listen.to_timescale()) query = """INSERT INTO listen (listened_at, track_name, user_name, data) VALUES %s ON CONFLICT (listened_at, track_name, user_name) DO NOTHING RETURNING listened_at, track_name, user_name""" inserted_rows = [] conn = timescale.engine.raw_connection() with conn.cursor() as curs: try: execute_values(curs, query, submit, template=None) while True: result = curs.fetchone() if not result: break inserted_rows.append((result[0], result[1], result[2])) except UntranslatableCharacter: conn.rollback() return conn.commit() # update the listen counts and timestamps for the users user_timestamps = {} user_counts = defaultdict(int) for ts, _, user_name in inserted_rows: if listen.user_name in user_timestamps: if ts < user_timestamps[listen.user_name][0]: user_timestamps[listen.user_name][0] = ts if ts > user_timestamps[listen.user_name][1]: user_timestamps[listen.user_name][1] = ts else: user_timestamps[listen.user_name] = [ts, ts] user_counts[user_name] += 1 for user_name in user_counts: cache._r.incrby(cache._prep_key( REDIS_USER_LISTEN_COUNT + user_name), user_counts[user_name]) for user in user_timestamps: self.update_timestamps_for_user( user, user_timestamps[user][0], user_timestamps[user][1]) return inserted_rows
def test_delete_listens(self): """ Test delete listens for a user """ # test get requests to delete-listens view first self.temporary_login(self.user['login_id']) resp = self.client.get(url_for('profile.delete_listens')) self.assert200(resp) # send three listens for the user resp = self.send_listens() self.assert200(resp) # set the latest_import ts to a non-default value, so that we can check it was # reset later val = int(time.time()) resp = self.client.post( url_for('api_v1.latest_import'), data=json.dumps({'ts': val}), headers={ 'Authorization': 'Token {}'.format(self.user['auth_token']) }, content_type='application/json', ) self.assert200(resp) resp = self.client.get( url_for('api_v1.latest_import', user_name=self.user['musicbrainz_id'])) self.assert200(resp) self.assertEqual(resp.json['latest_import'], val) self.assertNotEqual( self.redis.ttl( cache._prep_key(REDIS_USER_LISTEN_COUNT + str(self.user['id']))), 0) # check that listens have been successfully submitted resp = self.client.get( url_for('api_v1.get_listen_count', user_name=self.user['musicbrainz_id'])) self.assert200(resp) self.assertEqual(json.loads(resp.data)['payload']['count'], 3) # now delete all the listens we just sent # we do a get request first to put the CSRF token in the flask global context # so that we can access it for using in the post request in the next step self.client.get(url_for('profile.delete_listens')) resp = self.client.post(url_for('profile.delete_listens'), data={'csrf_token': g.csrf_token}) self.assertRedirects( resp, url_for('user.profile', user_name=self.user['musicbrainz_id'])) # listen counts are cached for 5 min, so delete key otherwise cached will be returned cache.delete(REDIS_USER_LISTEN_COUNT + str(self.user['id'])) # check that listens have been successfully deleted resp = self.client.get( url_for('api_v1.get_listen_count', user_name=self.user['musicbrainz_id'])) self.assert200(resp) self.assertEqual(json.loads(resp.data)['payload']['count'], 0) # check that the latest_import timestamp has been reset too resp = self.client.get( url_for('api_v1.latest_import', user_name=self.user['musicbrainz_id'])) self.assert200(resp) self.assertEqual(resp.json['latest_import'], 0)
def recalculate_all_user_data(): timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) db.init_db_connection(config.SQLALCHEMY_DATABASE_URI) init_cache(host=config.REDIS_HOST, port=config.REDIS_PORT, namespace=config.REDIS_NAMESPACE) # Find the created timestamp of the last listen query = "SELECT max(created) FROM listen WHERE created > :date" try: with timescale.engine.connect() as connection: result = connection.execute(sqlalchemy.text( query), date=datetime.now() - timedelta(weeks=4)) row = result.fetchone() last_created_ts = row[0] except psycopg2.OperationalError as e: logger.error("Cannot query ts to fetch latest listen." % str(e), exc_info=True) raise logger.info("Last created timestamp: " + str(last_created_ts)) # Select a list of users user_list = [] query = 'SELECT musicbrainz_id FROM "user"' try: with db.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query)) for row in result: user_list.append(row[0]) except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Fetched %d users. Setting empty cache entries." % len(user_list)) # Reset the timestamps and listen counts to 0 for all users for user_name in user_list: cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, time=0, encode=False) cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, time=0, encode=False) cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", time=0) # Tabulate all of the listen counts/timestamps for all users logger.info("Scan the whole listen table...") listen_counts = defaultdict(int) user_timestamps = {} query = "SELECT listened_at, user_name FROM listen where created <= :ts" try: with timescale.engine.connect() as connection: result = connection.execute( sqlalchemy.text(query), ts=last_created_ts) for row in result: ts = row[0] user_name = row[1] if user_name not in user_timestamps: user_timestamps[user_name] = [ts, ts] else: if ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = ts if ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = ts listen_counts[user_name] += 1 except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Setting updated cache entries.") # Set the timestamps and listen counts for all users for user_name in user_list: try: cache._r.incrby(cache._prep_key( REDIS_USER_LISTEN_COUNT + user_name), listen_counts[user_name]) except KeyError: pass try: tss = cache.get(REDIS_USER_TIMESTAMPS + user_name) (min_ts, max_ts) = tss.split(",") min_ts = int(min_ts) max_ts = int(max_ts) if min_ts and min_ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = min_ts if max_ts and max_ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = max_ts cache.set(REDIS_USER_TIMESTAMPS + user_name, "%d,%d" % (user_timestamps[user_name][0], user_timestamps[user_name][1]), time=0) except KeyError: pass