コード例 #1
0
 def increment_listen_count_for_day(self, day: datetime, count: int):
     """ Increment the number of listens submitted on the day `day`
     by `count`.
     """
     key = cache._prep_key(self.LISTEN_COUNT_PER_DAY_KEY + day.strftime('%Y%m%d'))
     cache._r.incrby(key, count)
     cache._r.expire(key, self.LISTEN_COUNT_PER_DAY_EXPIRY_TIME)
コード例 #2
0
    def delete_listen(self, listened_at: int, user_name: str,
                      recording_msid: str):
        """ Delete a particular listen for user with specified MusicBrainz ID.
        Args:
            listened_at: The timestamp of the listen
            user_name: the username of the user
            recording_msid: the MessyBrainz ID of the recording
        Raises: TimescaleListenStoreException if unable to delete the listen
        """

        args = {
            'listened_at': listened_at,
            'user_name': user_name,
            'recording_msid': recording_msid
        }
        query = """DELETE FROM listen
                    WHERE listened_at = :listened_at
                      AND user_name = :user_name
                      AND data -> 'track_metadata' -> 'additional_info' ->> 'recording_msid' = :recording_msid """

        try:
            with timescale.engine.connect() as connection:
                connection.execute(sqlalchemy.text(query), args)

            cache._r.decrby(
                cache._prep_key(REDIS_USER_LISTEN_COUNT + user_name))
        except psycopg2.OperationalError as e:
            self.log.error("Cannot delete listen for user: %s" % str(e))
            raise TimescaleListenStoreException
コード例 #3
0
    def get_recent_listens(self, max = RECENT_LISTENS_MAX):
        """
            Get the max number of most recent listens
        """
        recent = []
        for listen in cache._r.zrevrange(cache._prep_key(self.RECENT_LISTENS_KEY), 0, max - 1):
            recent.append(Listen.from_json(ujson.loads(listen)))

        return recent
コード例 #4
0
    def update_recent_listens(self, unique):
        """ 
            Store the most recent listens in redis so we can fetch them easily for a recent listens page. This
            is not a critical action, so if it fails, it fails. Let's live with it.
        """

        recent = {}
        for listen in unique:
            recent[ujson.dumps(listen.to_json()).encode('utf-8')] = float(listen.ts_since_epoch)

        # Don't take this very seriously -- if it fails, really no big deal. Let is go.
        if recent:
            cache._r.zadd(cache._prep_key(self.RECENT_LISTENS_KEY), recent, nx=True)

            # Don't prune the sorted list each time, but only when it reaches twice the desired size 
            count = cache._r.zcard(cache._prep_key(self.RECENT_LISTENS_KEY))
            if count > (self.RECENT_LISTENS_MAX * 2):
                cache._r.zpopmin(cache._prep_key(self.RECENT_LISTENS_KEY), count - self.RECENT_LISTENS_MAX - 1)
コード例 #5
0
    def set_listen_count_expiry_for_user(self, user_name):
        """ Set an expire time for the listen count cache item. This is called after
            a bulk import which allows for timescale continuous aggregates to catch up
            to listen counts. Once the key expires, the correct value can be calculated
            from the aggregate.

            Args:
                user_name: the musicbrainz id of user whose listen count needs an expiry time
        """
        cache._r.expire(cache._prep_key(REDIS_USER_LISTEN_COUNT + user_name), REDIS_POST_IMPORT_LISTEN_COUNT_EXPIRY)
コード例 #6
0
    def insert(self, listens):
        """
            Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates
            which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate.
        """

        submit = []
        for listen in listens:
            submit.append(listen.to_timescale())

        query = """INSERT INTO listen (listened_at, track_name, user_name, data)
                        VALUES %s
                   ON CONFLICT (listened_at, track_name, user_name)
                    DO NOTHING
                     RETURNING listened_at, track_name, user_name"""

        inserted_rows = []
        conn = timescale.engine.raw_connection()
        with conn.cursor() as curs:
            try:
                execute_values(curs, query, submit, template=None)
                while True:
                    result = curs.fetchone()
                    if not result:
                        break
                    inserted_rows.append((result[0], result[1], result[2]))
            except UntranslatableCharacter:
                conn.rollback()
                return

        conn.commit()

        # update the listen counts and timestamps for the users
        user_timestamps = {}
        user_counts = defaultdict(int)
        for ts, _, user_name in inserted_rows:
            if listen.user_name in user_timestamps:
                if ts < user_timestamps[listen.user_name][0]:
                    user_timestamps[listen.user_name][0] = ts
                if ts > user_timestamps[listen.user_name][1]:
                    user_timestamps[listen.user_name][1] = ts
            else:
                user_timestamps[listen.user_name] = [ts, ts]

            user_counts[user_name] += 1

        for user_name in user_counts:
            cache._r.incrby(cache._prep_key(
                REDIS_USER_LISTEN_COUNT + user_name), user_counts[user_name])

        for user in user_timestamps:
            self.update_timestamps_for_user(
                user, user_timestamps[user][0], user_timestamps[user][1])

        return inserted_rows
コード例 #7
0
    def test_delete_listens(self):
        """
        Test delete listens for a user
        """
        # test get requests to delete-listens view first
        self.temporary_login(self.user['login_id'])
        resp = self.client.get(url_for('profile.delete_listens'))
        self.assert200(resp)

        # send three listens for the user
        resp = self.send_listens()
        self.assert200(resp)

        # set the latest_import ts to a non-default value, so that we can check it was
        # reset later
        val = int(time.time())
        resp = self.client.post(
            url_for('api_v1.latest_import'),
            data=json.dumps({'ts': val}),
            headers={
                'Authorization': 'Token {}'.format(self.user['auth_token'])
            },
            content_type='application/json',
        )
        self.assert200(resp)
        resp = self.client.get(
            url_for('api_v1.latest_import',
                    user_name=self.user['musicbrainz_id']))
        self.assert200(resp)
        self.assertEqual(resp.json['latest_import'], val)

        self.assertNotEqual(
            self.redis.ttl(
                cache._prep_key(REDIS_USER_LISTEN_COUNT +
                                str(self.user['id']))), 0)

        # check that listens have been successfully submitted
        resp = self.client.get(
            url_for('api_v1.get_listen_count',
                    user_name=self.user['musicbrainz_id']))
        self.assert200(resp)
        self.assertEqual(json.loads(resp.data)['payload']['count'], 3)

        # now delete all the listens we just sent
        # we do a get request first to put the CSRF token in the flask global context
        # so that we can access it for using in the post request in the next step
        self.client.get(url_for('profile.delete_listens'))
        resp = self.client.post(url_for('profile.delete_listens'),
                                data={'csrf_token': g.csrf_token})
        self.assertRedirects(
            resp, url_for('user.profile',
                          user_name=self.user['musicbrainz_id']))

        # listen counts are cached for 5 min, so delete key otherwise cached will be returned
        cache.delete(REDIS_USER_LISTEN_COUNT + str(self.user['id']))

        # check that listens have been successfully deleted
        resp = self.client.get(
            url_for('api_v1.get_listen_count',
                    user_name=self.user['musicbrainz_id']))
        self.assert200(resp)
        self.assertEqual(json.loads(resp.data)['payload']['count'], 0)

        # check that the latest_import timestamp has been reset too
        resp = self.client.get(
            url_for('api_v1.latest_import',
                    user_name=self.user['musicbrainz_id']))
        self.assert200(resp)
        self.assertEqual(resp.json['latest_import'], 0)
コード例 #8
0
def recalculate_all_user_data():

    timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    db.init_db_connection(config.SQLALCHEMY_DATABASE_URI)
    init_cache(host=config.REDIS_HOST, port=config.REDIS_PORT,
               namespace=config.REDIS_NAMESPACE)

    # Find the created timestamp of the last listen
    query = "SELECT max(created) FROM listen WHERE created > :date"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(
                query), date=datetime.now() - timedelta(weeks=4))
            row = result.fetchone()
            last_created_ts = row[0]
    except psycopg2.OperationalError as e:
        logger.error("Cannot query ts to fetch latest listen." %
                     str(e), exc_info=True)
        raise

    logger.info("Last created timestamp: " + str(last_created_ts))

    # Select a list of users
    user_list = []
    query = 'SELECT musicbrainz_id FROM "user"'
    try:
        with db.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query))
            for row in result:
                user_list.append(row[0])
    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." %
                     str(e), exc_info=True)
        raise

    logger.info("Fetched %d users. Setting empty cache entries." %
                len(user_list))

    # Reset the timestamps and listen counts to 0 for all users
    for user_name in user_list:
        cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, time=0, encode=False)
        cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, time=0, encode=False)
        cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", time=0)

    # Tabulate all of the listen counts/timestamps for all users
    logger.info("Scan the whole listen table...")
    listen_counts = defaultdict(int)
    user_timestamps = {}
    query = "SELECT listened_at, user_name FROM listen where created <= :ts"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(
                sqlalchemy.text(query), ts=last_created_ts)
            for row in result:
                ts = row[0]
                user_name = row[1]
                if user_name not in user_timestamps:
                    user_timestamps[user_name] = [ts, ts]
                else:
                    if ts > user_timestamps[user_name][1]:
                        user_timestamps[user_name][1] = ts
                    if ts < user_timestamps[user_name][0]:
                        user_timestamps[user_name][0] = ts

                listen_counts[user_name] += 1

    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." %
                     str(e), exc_info=True)
        raise

    logger.info("Setting updated cache entries.")
    # Set the timestamps and listen counts for all users
    for user_name in user_list:
        try:
            cache._r.incrby(cache._prep_key(
                REDIS_USER_LISTEN_COUNT + user_name), listen_counts[user_name])
        except KeyError:
            pass

        try:
            tss = cache.get(REDIS_USER_TIMESTAMPS + user_name)
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
            if min_ts and min_ts < user_timestamps[user_name][0]:
                user_timestamps[user_name][0] = min_ts
            if max_ts and max_ts > user_timestamps[user_name][1]:
                user_timestamps[user_name][1] = max_ts
            cache.set(REDIS_USER_TIMESTAMPS + user_name, "%d,%d" %
                      (user_timestamps[user_name][0], user_timestamps[user_name][1]), time=0)
        except KeyError:
            pass