def _get_underground_trending(args, strategy):
    db = get_db_read_replica()
    with db.scoped_session() as session:
        current_user_id = args.get("current_user_id", None)
        limit, offset = args.get("limit"), args.get("offset")
        key = make_underground_trending_cache_key(strategy.version)

        (tracks, track_ids) = use_redis_cache(
            key, None, make_get_unpopulated_tracks(session, redis, strategy))

        # Apply limit + offset early to reduce the amount of
        # population work we have to do
        if limit is not None and offset is not None:
            track_ids = track_ids[offset:limit + offset]

        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        tracks_map = {track['track_id']: track for track in tracks}

        # Re-sort the populated tracks b/c it loses sort order in sql query
        sorted_tracks = [tracks_map[track_id] for track_id in track_ids]
        user_id_list = get_users_ids(sorted_tracks)
        users = get_users_by_id(session, user_id_list, current_user_id)
        for track in sorted_tracks:
            user = users[track['owner_id']]
            if user:
                track['user'] = user
        sorted_tracks = list(map(extend_track, sorted_tracks))
        return sorted_tracks
Example #2
0
def get_trending_tracks(args, strategy):
    """Gets trending by getting the currently cached tracks and then populating them."""
    db = get_db_read_replica()
    with db.scoped_session() as session:
        current_user_id, genre, time = args.get("current_user_id"), args.get(
            "genre"), args.get("time", "week")
        time_range = "week" if time not in ["week", "month", "year"] else time
        key = make_trending_cache_key(time_range, genre, strategy.version)

        # Will try to hit cached trending from task, falling back
        # to generating it here if necessary and storing it with no TTL
        (tracks, track_ids) = use_redis_cache(
            key, None,
            make_generate_unpopulated_trending(session, genre, time_range,
                                               strategy))

        # populate track metadata
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        tracks_map = {track['track_id']: track for track in tracks}

        # Re-sort the populated tracks b/c it loses sort order in sql query
        sorted_tracks = [tracks_map[track_id] for track_id in track_ids]

        if args.get("with_users", False):
            user_id_list = get_users_ids(sorted_tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in sorted_tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user
        return sorted_tracks
def _get_underground_trending_with_session(
    session: Session,
    args: GetUndergroundTrendingTrackcArgs,
    strategy,
    use_request_context=True,
):
    current_user_id = args.get("current_user_id", None)
    limit, offset = args.get("limit"), args.get("offset")
    key = make_underground_trending_cache_key(strategy.version)

    (tracks, track_ids) = use_redis_cache(
        key, None, make_get_unpopulated_tracks(session, redis_conn, strategy)
    )

    # Apply limit + offset early to reduce the amount of
    # population work we have to do
    if limit is not None and offset is not None:
        track_ids = track_ids[offset : limit + offset]

    tracks = populate_track_metadata(session, track_ids, tracks, current_user_id)

    tracks_map = {track["track_id"]: track for track in tracks}

    # Re-sort the populated tracks b/c it loses sort order in sql query
    sorted_tracks = [tracks_map[track_id] for track_id in track_ids]
    user_id_list = get_users_ids(sorted_tracks)
    users = get_users_by_id(session, user_id_list, current_user_id, use_request_context)
    for track in sorted_tracks:
        user = users[track["owner_id"]]
        if user:
            track["user"] = user
    sorted_tracks = list(map(extend_track, sorted_tracks))
    return sorted_tracks
def get_top_followee_windowed(type, window, args):
    if type != "track":
        raise exceptions.ArgumentError(
            "Invalid type provided, must be one of 'track'")

    valid_windows = ["week", "month", "year"]
    if not window or window not in valid_windows:
        raise exceptions.ArgumentError(
            f"Invalid window provided, must be one of {valid_windows}")

    limit = args.get("limit", 25)

    current_user_id = args.get("user_id")
    db = get_db_read_replica()
    with db.scoped_session() as session:

        followee_user_ids = session.query(Follow.followee_user_id).filter(
            Follow.follower_user_id == current_user_id,
            Follow.is_current == True,
            Follow.is_delete == False,
        )
        followee_user_ids_subquery = followee_user_ids.subquery()

        # Queries for tracks joined against followed users and counts
        tracks_query = (
            session.query(Track, ).join(
                followee_user_ids_subquery,
                Track.owner_id ==
                followee_user_ids_subquery.c.followee_user_id,
            ).join(AggregateTrack, Track.track_id == AggregateTrack.track_id).
            filter(
                Track.is_current == True,
                Track.is_delete == False,
                Track.is_unlisted == False,
                Track.stem_of == None,
                # Query only tracks created `window` time ago (week, month, etc.)
                Track.created_at >= text(f"NOW() - interval '1 {window}'"),
            ).order_by(
                desc(AggregateTrack.repost_count + AggregateTrack.save_count),
                desc(Track.track_id),
            ).limit(limit))

        tracks_query_results = tracks_query.all()
        tracks = helpers.query_result_to_list(tracks_query_results)
        track_ids = list(map(lambda track: track["track_id"], tracks))

        # Bundle peripheral info into track results
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list)
            for track in tracks:
                user = users[track["owner_id"]]
                if user:
                    track["user"] = user

    return tracks
def get_top_followee_saves(saveType, args):
    if saveType != 'track':
        raise exceptions.ArgumentError(
            "Invalid type provided, must be one of 'track'")

    limit = args.get('limit', 25)

    current_user_id = get_current_user_id()
    db = get_db_read_replica()
    with db.scoped_session() as session:
        # Construct a subquery of all followees
        followee_user_ids = (session.query(Follow.followee_user_id).filter(
            Follow.follower_user_id == current_user_id,
            Follow.is_current == True, Follow.is_delete == False))
        followee_user_ids_subquery = followee_user_ids.subquery()

        # Construct a subquery of all saves from followees aggregated by id
        save_count = (session.query(
            Save.save_item_id,
            func.count(Save.save_item_id).label(
                response_name_constants.save_count)).join(
                    followee_user_ids_subquery, Save.user_id ==
                    followee_user_ids_subquery.c.followee_user_id).filter(
                        Save.is_current == True,
                        Save.is_delete == False,
                        Save.save_type == saveType,
                    ).group_by(Save.save_item_id).order_by(
                        desc(response_name_constants.save_count)).limit(limit))
        save_count_subquery = save_count.subquery()

        # Query for tracks joined against followee save counts
        tracks_query = (session.query(Track, ).join(
            save_count_subquery,
            Track.track_id == save_count_subquery.c.save_item_id).filter(
                Track.is_current == True,
                Track.is_delete == False,
                Track.is_unlisted == False,
                Track.stem_of == None,
            ))

        tracks_query_results = tracks_query.all()
        tracks = helpers.query_result_to_list(tracks_query_results)
        track_ids = list(map(lambda track: track['track_id'], tracks))

        # bundle peripheral info into track results
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get('with_users', False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list)
            for track in tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user

    return tracks
Example #6
0
def get_trending_tracks(args):
    limit, offset, current_user_id = args.get("limit"), args.get(
        "offset"), args.get("current_user_id")

    db = get_db_read_replica()

    time = args.get('time')
    query_time = None if time not in ["day", "week", "month", "year"] else time

    with db.scoped_session() as session:

        def get_unpopulated_trending():
            trending_tracks = generate_trending(session, query_time,
                                                args.get('genre', None), limit,
                                                offset)

            track_scores = [
                z(time, track) for track in trending_tracks['listen_counts']
            ]
            sorted_track_scores = sorted(track_scores,
                                         key=lambda k: k['score'],
                                         reverse=True)

            track_ids = [track['track_id'] for track in sorted_track_scores]

            tracks = get_unpopulated_tracks(session, track_ids)
            return (tracks, track_ids)

        # get scored trending tracks, either
        # through cached redis value, or through `get_unpopulated_trending`
        cache_keys = {"genre": args.get("genre"), "time": args.get("time")}
        key = extract_key(f"generated-trending:{request.path}",
                          cache_keys.items())
        (tracks, track_ids) = use_redis_cache(key, SCORES_CACHE_DURATION_SEC,
                                              get_unpopulated_trending)

        # populate track metadata
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        tracks_map = {track['track_id']: track for track in tracks}

        # Re-sort the populated tracks b/c it loses sort order in sql query
        sorted_tracks = [tracks_map[track_id] for track_id in track_ids]

        if args.get("with_users", False):
            user_id_list = get_users_ids(sorted_tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in sorted_tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user
        return sorted_tracks
def add_users(session, results):
    user_id_list = get_users_ids(results)
    users = get_users_by_id(session, user_id_list)
    for result in results:
        user_id = None
        if 'playlist_owner_id' in result:
            user_id = result['playlist_owner_id']
        elif 'owner_id' in result:
            user_id = result['owner_id']

        if user_id is not None:
            user = users[user_id]
            result["user"] = user
    return results
def get_save_tracks(args):
    user_id = args.get("user_id")
    current_user_id = args.get("current_user_id")
    limit = args.get("limit")
    offset = args.get("offset")
    filter_deleted = args.get("filter_deleted")

    db = get_db_read_replica()
    with db.scoped_session() as session:
        base_query = (session.query(Track, Save.created_at).join(
            Save, Save.save_item_id == Track.track_id).filter(
                Track.is_current == True,
                Save.user_id == user_id,
                Save.is_current == True,
                Save.is_delete == False,
                Save.save_type == SaveType.track,
            ))

        # Allow filtering of deletes
        if filter_deleted:
            base_query = base_query.filter(Track.is_delete == False)

        base_query = base_query.order_by(Save.created_at.desc(),
                                         Track.track_id.desc())

        query_results = add_query_pagination(base_query, limit, offset).all()

        if not query_results:
            return []

        tracks, save_dates = zip(*query_results)
        tracks = helpers.query_result_to_list(tracks)
        track_ids = list(map(lambda track: track["track_id"], tracks))

        # bundle peripheral info into track results
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in tracks:
                user = users[track["owner_id"]]
                if user:
                    track["user"] = user

        for idx, track in enumerate(tracks):
            track[response_name_constants.activity_timestamp] = save_dates[idx]

        return tracks
def get_trending_tracks(args):
    (limit, offset) = get_pagination_vars()
    current_user_id = get_current_user_id(required=False)

    db = get_db_read_replica()

    time = args.get('time')
    # Identity understands allTime as millennium.
    # TODO: Change this in https://github.com/AudiusProject/audius-protocol/pull/768/files
    query_time = time
    if time == 'allTime':
        query_time = 'millennium'

    with db.scoped_session() as session:
        trending_tracks = generate_trending(get_db_read_replica(), query_time,
                                            args.get('genre', None), limit,
                                            offset)

        track_scores = [
            z(time, track) for track in trending_tracks['listen_counts']
        ]
        sorted_track_scores = sorted(track_scores,
                                     key=lambda k: k['score'],
                                     reverse=True)

        track_ids = [track['track_id'] for track in sorted_track_scores]

        tracks = session.query(Track).filter(
            Track.is_current == True, Track.is_unlisted == False,
            Track.stem_of == None, Track.track_id.in_(track_ids)).all()
        tracks = helpers.query_result_to_list(tracks)

        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        tracks_map = {track['track_id']: track for track in tracks}

        # Re-sort the populated tracks b/c it loses sort order in sql query
        sorted_tracks = [tracks_map[track_id] for track_id in track_ids]

        if args.get("with_users", False):
            user_id_list = get_users_ids(sorted_tracks)
            users = get_users_by_id(session, user_id_list)
            for track in sorted_tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user
        return sorted_tracks
def get_trending_tracks(args):
    (limit, offset) = get_pagination_vars()
    current_user_id = get_current_user_id(required=False)

    db = get_db_read_replica()
    time = args.get('time')
    with db.scoped_session() as session:
        trending_tracks = generate_trending(get_db_read_replica(), time,
                                            args.get('genre', None), limit,
                                            offset)

        track_scores = [
            z(time, track) for track in trending_tracks['listen_counts']
        ]
        sorted_track_scores = sorted(track_scores,
                                     key=lambda k: k['score'],
                                     reverse=True)

        track_ids = [track['track_id'] for track in sorted_track_scores]

        tracks = session.query(Track).filter(
            Track.is_current == True, Track.is_unlisted == False,
            Track.stem_of == None, Track.track_id.in_(track_ids)).all()
        tracks = helpers.query_result_to_list(tracks)

        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        tracks_map = {track['track_id']: track for track in tracks}

        # Re-sort the populated tracks b/c it loses sort order in sql query
        sorted_tracks = [tracks_map[track_id] for track_id in track_ids]

        if args.get("with_users", False):
            user_id_list = get_users_ids(sorted_tracks)
            users = get_users_by_id(session, user_id_list)
            for track in sorted_tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user
        return sorted_tracks
def _get_trending_tracks_with_session(session: Session,
                                      args: GetTrendingTracksArgs,
                                      strategy: BaseTrendingStrategy):
    current_user_id, genre, time = (
        args.get("current_user_id"),
        args.get("genre"),
        args.get("time", "week"),
    )
    time_range = "week" if time not in ["week", "month", "year", "allTime"
                                        ] else time
    key = make_trending_cache_key(time_range, genre, strategy.version)

    # Will try to hit cached trending from task, falling back
    # to generating it here if necessary and storing it with no TTL
    (tracks, track_ids) = use_redis_cache(
        key,
        None,
        make_generate_unpopulated_trending(session, genre, time_range,
                                           strategy),
    )

    # populate track metadata
    tracks = populate_track_metadata(session, track_ids, tracks,
                                     current_user_id)
    tracks_map = {track["track_id"]: track for track in tracks}

    # Re-sort the populated tracks b/c it loses sort order in sql query
    sorted_tracks = [tracks_map[track_id] for track_id in track_ids]

    if args.get("with_users", False):
        user_id_list = get_users_ids(sorted_tracks)
        users = get_users_by_id(session, user_id_list, current_user_id)
        for track in sorted_tracks:
            user = users[track["owner_id"]]
            if user:
                track["user"] = user
    return sorted_tracks
Example #12
0
def get_playlists(args):
    playlists = []
    current_user_id = args.get("current_user_id")

    db = get_db_read_replica()
    with db.scoped_session() as session:

        def get_unpopulated_playlists():
            playlist_query = (session.query(Playlist).filter(
                Playlist.is_current == True))

            # playlist ids filter if the optional query param is passed in
            if "playlist_id" in args:
                playlist_id_list = args.get("playlist_id")
                try:
                    playlist_query = playlist_query.filter(
                        Playlist.playlist_id.in_(playlist_id_list))
                except ValueError as e:
                    raise exceptions.ArgumentError(
                        "Invalid value found in playlist id list", e)

            if "user_id" in args:
                user_id = args.get("user_id")
                # user id filter if the optional query param is passed in
                playlist_query = playlist_query.filter(
                    Playlist.playlist_owner_id == user_id)

            # If no current_user_id, never show hidden playlists
            if not current_user_id:
                playlist_query = playlist_query.filter(
                    Playlist.is_private == False)

            # Filter out deletes unless we're fetching explicitly by id
            if "playlist_id" not in args:
                playlist_query = playlist_query.filter(
                    Playlist.is_delete == False)

            playlist_query = playlist_query.order_by(desc(Playlist.created_at))
            playlists = paginate_query(playlist_query).all()
            playlists = helpers.query_result_to_list(playlists)

            # if we passed in a current_user_id, filter out all privte playlists where
            # the owner_id doesn't match the current_user_id
            if current_user_id:
                playlists = list(
                    filter(
                        lambda playlist: (not playlist["is_private"]) or
                        playlist["playlist_owner_id"] == current_user_id,
                        playlists))

            # retrieve playlist ids list
            playlist_ids = list(
                map(lambda playlist: playlist["playlist_id"], playlists))

            return (playlists, playlist_ids)

        try:
            # Get unpopulated playlists, either via
            # redis cache or via get_unpopulated_playlists
            key = make_cache_key(args)

            (playlists, playlist_ids) = use_redis_cache(
                key, UNPOPULATED_PLAYLIST_CACHE_DURATION_SEC,
                get_unpopulated_playlists)

            # bundle peripheral info into playlist results
            playlists = populate_playlist_metadata(
                session, playlist_ids, playlists,
                [RepostType.playlist, RepostType.album],
                [SaveType.playlist, SaveType.album], current_user_id)

            if args.get("with_users", False):
                user_id_list = get_users_ids(playlists)
                users = get_users_by_id(session, user_id_list, current_user_id)
                for playlist in playlists:
                    user = users[playlist['playlist_owner_id']]
                    if user:
                        playlist['user'] = user

        except sqlalchemy.orm.exc.NoResultFound:
            pass
    return playlists
Example #13
0
def get_tracks(args):
    """
    Gets tracks.
    A note on caching strategy:
        - This method is cached at two layers: at the API via the @cache decorator,
        and within this method using the shared get_unpopulated_tracks cache.

        The shared cache only works when fetching via ID, so calls to fetch tracks
        via handle, asc/desc sort, or filtering by block_number won't hit the shared cache.
        These will hit the API cache unless they have a current_user_id included.

    """
    tracks = []

    db = get_db_read_replica()
    with db.scoped_session() as session:

        def get_tracks_and_ids():
            if "handle" in args:
                handle = args.get("handle")
                user_id = session.query(User.user_id).filter(
                    User.handle_lc == handle.lower()).first()
                args["user_id"] = user_id

            can_use_shared_cache = ("id" in args
                                    and not "min_block_number" in args
                                    and not "sort" in args
                                    and not "user_id" in args)

            if can_use_shared_cache:
                should_filter_deleted = args.get("filter_deleted", False)
                tracks = get_unpopulated_tracks(session, args["id"],
                                                should_filter_deleted)
                track_ids = list(map(lambda track: track["track_id"], tracks))
                return (tracks, track_ids)

            (limit, offset) = get_pagination_vars()
            args["limit"] = limit
            args["offset"] = offset

            tracks = _get_tracks(session, args)

            track_ids = list(map(lambda track: track["track_id"], tracks))

            return (tracks, track_ids)

        (tracks, track_ids) = get_tracks_and_ids()

        # bundle peripheral info into track results
        current_user_id = args.get("current_user_id")
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user

    return tracks
Example #14
0
def get_repost_feed_for_user(user_id, args):
    feed_results = {}
    db = get_db_read_replica()
    with db.scoped_session() as session:
        # query all reposts by user
        repost_query = (session.query(Repost).filter(
            Repost.is_current == True, Repost.is_delete == False,
            Repost.user_id == user_id).order_by(desc(Repost.created_at),
                                                desc(Repost.repost_item_id),
                                                desc(Repost.repost_type)))

        reposts = paginate_query(repost_query).all()

        # get track reposts from above
        track_reposts = [
            r for r in reposts if r.repost_type == RepostType.track
        ]

        # get reposted track ids
        repost_track_ids = [r.repost_item_id for r in track_reposts]

        # get playlist reposts from above
        playlist_reposts = [
            r for r in reposts if r.repost_type == RepostType.playlist
            or r.repost_type == RepostType.album
        ]

        # get reposted playlist ids
        repost_playlist_ids = [r.repost_item_id for r in playlist_reposts]

        track_reposts = helpers.query_result_to_list(track_reposts)
        playlist_reposts = helpers.query_result_to_list(playlist_reposts)

        # build track/playlist id --> repost dict from repost lists
        track_repost_dict = {
            repost["repost_item_id"]: repost
            for repost in track_reposts
        }
        playlist_repost_dict = {
            repost["repost_item_id"]: repost
            for repost in playlist_reposts
        }

        # query tracks for repost_track_ids
        track_query = (session.query(Track).filter(
            Track.is_current == True, Track.is_delete == False,
            Track.is_unlisted == False, Track.stem_of == None,
            Track.track_id.in_(repost_track_ids)).order_by(
                desc(Track.created_at)))
        tracks = track_query.all()
        tracks = helpers.query_result_to_list(tracks)

        # get track ids
        track_ids = [track["track_id"] for track in tracks]

        # query playlists for repost_playlist_ids
        playlist_query = (session.query(Playlist).filter(
            Playlist.is_current == True, Playlist.is_delete == False,
            Playlist.is_private == False,
            Playlist.playlist_id.in_(repost_playlist_ids)).order_by(
                desc(Playlist.created_at)))
        playlists = playlist_query.all()
        playlists = helpers.query_result_to_list(playlists)

        # get playlist ids
        playlist_ids = [playlist["playlist_id"] for playlist in playlists]

        # get repost counts by track and playlist IDs
        repost_counts = get_repost_counts(session, False, True,
                                          track_ids + playlist_ids, None)
        track_repost_counts = {
            repost_item_id: repost_count
            for (repost_item_id, repost_count, repost_type) in repost_counts
            if repost_type == RepostType.track
        }
        playlist_repost_counts = {
            repost_item_id: repost_count
            for (repost_item_id, repost_count, repost_type) in repost_counts
            if repost_type in (RepostType.playlist, RepostType.album)
        }

        # get save counts for tracks and playlists
        save_counts = get_save_counts(session, False, True,
                                      track_ids + playlist_ids, None)
        track_save_counts = {
            save_item_id: save_count
            for (save_item_id, save_count, save_type) in save_counts
            if save_type == SaveType.track
        }
        playlist_save_counts = {
            save_item_id: save_count
            for (save_item_id, save_count, save_type) in save_counts
            if save_type in (SaveType.playlist, SaveType.album)
        }

        current_user_id = get_current_user_id(required=False)
        requested_user_is_current_user = False
        user_reposted_track_ids = {}
        user_reposted_playlist_ids = {}
        user_saved_track_dict = {}
        user_saved_playlist_dict = {}
        followees_track_repost_dict = {}
        followees_playlist_repost_dict = {}
        if current_user_id:
            # if current user = user_id, skip current_user_reposted queries and default to true
            if current_user_id == user_id:
                requested_user_is_current_user = True
            else:
                user_reposted_query = (session.query(
                    Repost.repost_item_id, Repost.repost_type).filter(
                        Repost.is_current == True, Repost.is_delete == False,
                        Repost.user_id == current_user_id,
                        or_(Repost.repost_item_id.in_(track_ids),
                            Repost.repost_item_id.in_(playlist_ids))).all())

                # generate dictionary of track id --> current user reposted status
                user_reposted_track_ids = {
                    r[0]: True
                    for r in user_reposted_query if r[1] == RepostType.track
                }

                # generate dictionary of playlist id --> current user reposted status
                user_reposted_playlist_ids = {
                    r[0]: True
                    for r in user_reposted_query
                    if r[1] == RepostType.album or r[1] == RepostType.playlist
                }

            # build dict of tracks and playlists that current user has saved

            #   - query saves by current user from relevant tracks/playlists
            user_saved_query = (session.query(
                Save.save_item_id, Save.save_type).filter(
                    Save.is_current == True, Save.is_delete == False,
                    Save.user_id == current_user_id,
                    or_(Save.save_item_id.in_(track_ids),
                        Save.save_item_id.in_(playlist_ids))).all())
            #   - build dict of track id --> current user save status
            user_saved_track_dict = {
                save[0]: True
                for save in user_saved_query if save[1] == SaveType.track
            }
            #   - build dict of playlist id --> current user save status
            user_saved_playlist_dict = {
                save[0]: True
                for save in user_saved_query
                if save[1] == SaveType.playlist or save[1] == SaveType.album
            }

            # query current user's followees
            followee_user_ids = (session.query(Follow.followee_user_id).filter(
                Follow.follower_user_id == current_user_id,
                Follow.is_current == True, Follow.is_delete == False).all())
            followee_user_ids = [f[0] for f in followee_user_ids]

            # query all followees' reposts
            followee_repost_query = (session.query(Repost).filter(
                Repost.is_current == True, Repost.is_delete == False,
                Repost.user_id.in_(followee_user_ids),
                or_(Repost.repost_item_id.in_(repost_track_ids),
                    Repost.repost_item_id.in_(repost_playlist_ids))).order_by(
                        desc(Repost.created_at)))
            followee_reposts = paginate_query(followee_repost_query).all()
            followee_reposts = helpers.query_result_to_list(followee_reposts)

            # build dict of track id --> reposts from followee track reposts
            for repost in followee_reposts:
                if repost["repost_type"] == RepostType.track:
                    if repost[
                            "repost_item_id"] not in followees_track_repost_dict:
                        followees_track_repost_dict[
                            repost["repost_item_id"]] = []
                    followees_track_repost_dict[
                        repost["repost_item_id"]].append(repost)

            # build dict of playlist id --> reposts from followee playlist reposts
            for repost in followee_reposts:
                if (repost["repost_type"] == RepostType.playlist
                        or repost["repost_type"] == RepostType.album):
                    if repost[
                            "repost_item_id"] not in followees_playlist_repost_dict:
                        followees_playlist_repost_dict[
                            repost["repost_item_id"]] = []
                    followees_playlist_repost_dict[
                        repost["repost_item_id"]].append(repost)

        # populate metadata for track entries
        for track in tracks:
            track[response_name_constants.
                  repost_count] = track_repost_counts.get(
                      track["track_id"], 0)
            track[response_name_constants.save_count] = track_save_counts.get(
                track["track_id"], 0)
            track[response_name_constants.has_current_user_reposted] = (
                True if requested_user_is_current_user else
                user_reposted_track_ids.get(track["track_id"], False))
            track[response_name_constants.
                  has_current_user_saved] = user_saved_track_dict.get(
                      track["track_id"], False)
            track[response_name_constants.
                  followee_reposts] = followees_track_repost_dict.get(
                      track["track_id"], [])
            track[response_name_constants.
                  activity_timestamp] = track_repost_dict[
                      track["track_id"]]["created_at"]

        for playlist in playlists:
            playlist[response_name_constants.
                     repost_count] = playlist_repost_counts.get(
                         playlist["playlist_id"], 0)
            playlist[
                response_name_constants.save_count] = playlist_save_counts.get(
                    playlist["playlist_id"], 0)
            playlist[response_name_constants.has_current_user_reposted] = (
                True if requested_user_is_current_user else
                user_reposted_playlist_ids.get(playlist["playlist_id"], False))
            playlist[response_name_constants.has_current_user_saved] = \
                user_saved_playlist_dict.get(playlist["playlist_id"], False)
            playlist[response_name_constants.followee_reposts] = \
                followees_playlist_repost_dict.get(playlist["playlist_id"], [])
            playlist[response_name_constants.activity_timestamp] = \
                playlist_repost_dict[playlist["playlist_id"]]["created_at"]

        unsorted_feed = tracks + playlists

        # sort feed by repost timestamp desc
        feed_results = sorted(unsorted_feed,
                              key=lambda entry: entry[response_name_constants.
                                                      activity_timestamp],
                              reverse=True)

        if args.get("with_users", False):
            user_id_list = get_users_ids(feed_results)
            users = get_users_by_id(session, user_id_list)
            for result in feed_results:
                if 'playlist_owner_id' in result:
                    user = users[result['playlist_owner_id']]
                    if user:
                        result['user'] = user
                elif 'owner_id' in result:
                    user = users[result['owner_id']]
                    if user:
                        result['user'] = user

    return feed_results
Example #15
0
def get_feed(args):
    feed_results = []
    db = get_db_read_replica()

    feed_filter = args.get("filter")
    # Allow for fetching only tracks
    tracks_only = args.get('tracks_only', False)

    # Current user - user for whom feed is being generated
    current_user_id = get_current_user_id()
    with db.scoped_session() as session:
        # Generate list of users followed by current user, i.e. 'followees'
        followee_user_ids = (session.query(Follow.followee_user_id).filter(
            Follow.follower_user_id == current_user_id,
            Follow.is_current == True, Follow.is_delete == False).all())
        followee_user_ids = [f[0] for f in followee_user_ids]

        # Fetch followee creations if requested
        if feed_filter in ["original", "all"]:
            if not tracks_only:
                # Query playlists posted by followees, sorted and paginated by created_at desc
                created_playlists_query = (session.query(Playlist).filter(
                    Playlist.is_current == True, Playlist.is_delete == False,
                    Playlist.is_private == False,
                    Playlist.playlist_owner_id.in_(
                        followee_user_ids)).order_by(desc(
                            Playlist.created_at)))
                created_playlists = paginate_query(created_playlists_query,
                                                   False).all()

                # get track ids for all tracks in playlists
                playlist_track_ids = set()
                for playlist in created_playlists:
                    for track in playlist.playlist_contents["track_ids"]:
                        playlist_track_ids.add(track["track"])

                # get all track objects for track ids
                playlist_tracks = get_unpopulated_tracks(
                    session, playlist_track_ids)
                playlist_tracks_dict = {
                    track["track_id"]: track
                    for track in playlist_tracks
                }

                # get all track ids that have same owner as playlist and created in "same action"
                # "same action": track created within [x time] before playlist creation
                tracks_to_dedupe = set()
                for playlist in created_playlists:
                    for track_entry in playlist.playlist_contents["track_ids"]:
                        track = playlist_tracks_dict.get(track_entry["track"])
                        if not track:
                            return api_helpers.error_response(
                                "Something caused the server to crash.")
                        max_timedelta = datetime.timedelta(
                            minutes=trackDedupeMaxMinutes)
                        if (track["owner_id"] == playlist.playlist_owner_id) and \
                            (track["created_at"] <= playlist.created_at) and \
                                (playlist.created_at - track["created_at"] <= max_timedelta):
                            tracks_to_dedupe.add(track["track_id"])
                tracks_to_dedupe = list(tracks_to_dedupe)
            else:
                # No playlists to consider
                tracks_to_dedupe = []
                created_playlists = []

            # Query tracks posted by followees, sorted & paginated by created_at desc
            # exclude tracks that were posted in "same action" as playlist
            created_tracks_query = (session.query(Track).filter(
                Track.is_current == True, Track.is_delete == False,
                Track.is_unlisted == False, Track.stem_of == None,
                Track.owner_id.in_(followee_user_ids),
                Track.track_id.notin_(tracks_to_dedupe)).order_by(
                    desc(Track.created_at)))
            created_tracks = paginate_query(created_tracks_query, False).all()

            # extract created_track_ids and created_playlist_ids
            created_track_ids = [track.track_id for track in created_tracks]
            created_playlist_ids = [
                playlist.playlist_id for playlist in created_playlists
            ]

        # Fetch followee reposts if requested
        if feed_filter in ["repost", "all"]:
            # query items reposted by followees, sorted by oldest followee repost of item;
            # paginated by most recent repost timestamp
            repost_subquery = (session.query(Repost).filter(
                Repost.is_current == True, Repost.is_delete == False,
                Repost.user_id.in_(followee_user_ids)))
            # exclude items also created by followees to guarantee order determinism, in case of "all" filter
            if feed_filter == "all":
                repost_subquery = (repost_subquery.filter(
                    or_(
                        and_(Repost.repost_type == RepostType.track,
                             Repost.repost_item_id.notin_(created_track_ids)),
                        and_(
                            Repost.repost_type != RepostType.track,
                            Repost.repost_item_id.notin_(
                                created_playlist_ids)))))
            repost_subquery = repost_subquery.subquery()

            repost_query = (session.query(
                repost_subquery.c.repost_item_id,
                repost_subquery.c.repost_type,
                func.min(repost_subquery.c.created_at).label(
                    "min_created_at")).group_by(
                        repost_subquery.c.repost_item_id,
                        repost_subquery.c.repost_type).order_by(
                            desc("min_created_at")))
            followee_reposts = paginate_query(repost_query, False).all()

            # build dict of track_id / playlist_id -> oldest followee repost timestamp from followee_reposts above
            track_repost_timestamp_dict = {}
            playlist_repost_timestamp_dict = {}
            for (repost_item_id, repost_type,
                 oldest_followee_repost_timestamp) in followee_reposts:
                if repost_type == RepostType.track:
                    track_repost_timestamp_dict[
                        repost_item_id] = oldest_followee_repost_timestamp
                elif repost_type in (RepostType.playlist, RepostType.album):
                    playlist_repost_timestamp_dict[
                        repost_item_id] = oldest_followee_repost_timestamp

            # extract reposted_track_ids and reposted_playlist_ids
            reposted_track_ids = list(track_repost_timestamp_dict.keys())
            reposted_playlist_ids = list(playlist_repost_timestamp_dict.keys())

            # Query tracks reposted by followees
            reposted_tracks = session.query(Track).filter(
                Track.is_current == True, Track.is_delete == False,
                Track.is_unlisted == False, Track.stem_of == None,
                Track.track_id.in_(reposted_track_ids))
            # exclude tracks already fetched from above, in case of "all" filter
            if feed_filter == "all":
                reposted_tracks = reposted_tracks.filter(
                    Track.track_id.notin_(created_track_ids))
            reposted_tracks = reposted_tracks.order_by(desc(
                Track.created_at)).all()

            if not tracks_only:
                # Query playlists reposted by followees, excluding playlists already fetched from above
                reposted_playlists = session.query(Playlist).filter(
                    Playlist.is_current == True, Playlist.is_delete == False,
                    Playlist.is_private == False,
                    Playlist.playlist_id.in_(reposted_playlist_ids))
                # exclude playlists already fetched from above, in case of "all" filter
                if feed_filter == "all":
                    reposted_playlists = reposted_playlists.filter(
                        Playlist.playlist_id.notin_(created_playlist_ids))
                reposted_playlists = reposted_playlists.order_by(
                    desc(Playlist.created_at)).all()
            else:
                reposted_playlists = []

        if feed_filter == "original":
            tracks_to_process = created_tracks
            playlists_to_process = created_playlists
        elif feed_filter == "repost":
            tracks_to_process = reposted_tracks
            playlists_to_process = reposted_playlists
        else:
            tracks_to_process = created_tracks + reposted_tracks
            playlists_to_process = created_playlists + reposted_playlists

        tracks = helpers.query_result_to_list(tracks_to_process)
        playlists = helpers.query_result_to_list(playlists_to_process)

        # define top level feed activity_timestamp to enable sorting
        # activity_timestamp: created_at if item created by followee, else reposted_at
        for track in tracks:
            if track["owner_id"] in followee_user_ids:
                track[response_name_constants.
                      activity_timestamp] = track["created_at"]
            else:
                track[response_name_constants.
                      activity_timestamp] = track_repost_timestamp_dict[
                          track["track_id"]]
        for playlist in playlists:
            if playlist["playlist_owner_id"] in followee_user_ids:
                playlist[response_name_constants.
                         activity_timestamp] = playlist["created_at"]
            else:
                playlist[response_name_constants.activity_timestamp] = \
                    playlist_repost_timestamp_dict[playlist["playlist_id"]]

        # bundle peripheral info into track and playlist objects
        track_ids = list(map(lambda track: track["track_id"], tracks))
        playlist_ids = list(
            map(lambda playlist: playlist["playlist_id"], playlists))
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        playlists = populate_playlist_metadata(
            session, playlist_ids, playlists,
            [RepostType.playlist, RepostType.album],
            [SaveType.playlist, SaveType.album], current_user_id)

        # build combined feed of tracks and playlists
        unsorted_feed = tracks + playlists

        # sort feed based on activity_timestamp
        sorted_feed = sorted(unsorted_feed,
                             key=lambda entry: entry[response_name_constants.
                                                     activity_timestamp],
                             reverse=True)

        # truncate feed to requested limit
        (limit, _) = get_pagination_vars()
        feed_results = sorted_feed[0:limit]

        if "with_users" in args and args.get("with_users") != 'false':
            user_id_list = get_users_ids(feed_results)
            users = get_users_by_id(session, user_id_list)
            for result in feed_results:
                if 'playlist_owner_id' in result:
                    user = users[result['playlist_owner_id']]
                    if user:
                        result['user'] = user
                elif 'owner_id' in result:
                    user = users[result['owner_id']]
                    if user:
                        result['user'] = user

    return feed_results
Example #16
0
def get_feed_es(args, limit=10):
    current_user_id = str(args.get("user_id"))
    feed_filter = args.get("filter", "all")
    load_reposts = feed_filter in ["repost", "all"]
    load_orig = feed_filter in ["original", "all"]

    mdsl = []

    if load_reposts:
        mdsl.extend([
            {
                "index": ES_REPOSTS
            },
            {
                "query": {
                    "bool": {
                        "must": [
                            following_ids_terms_lookup(current_user_id,
                                                       "user_id"),
                            {
                                "term": {
                                    "is_delete": False
                                }
                            },
                            {
                                "range": {
                                    "created_at": {
                                        "gte": "now-30d"
                                    }
                                }
                            },
                        ]
                    }
                },
                # here doing some over-fetching to de-dupe later
                # to approximate min_created_at + group by in SQL.
                "size": 0,
                "aggs": {
                    "item_key": {
                        "terms": {
                            "field": "item_key",
                            "size": 500
                        },
                        "aggs": {
                            "min_created_at": {
                                "min": {
                                    "field": "created_at"
                                }
                            }
                        },
                    }
                },
            },
        ])

    if load_orig:
        mdsl.extend([
            {
                "index": ES_TRACKS
            },
            {
                "query": {
                    "bool": {
                        "must": [
                            following_ids_terms_lookup(current_user_id,
                                                       "owner_id"),
                            {
                                "term": {
                                    "is_unlisted": False
                                }
                            },
                            {
                                "term": {
                                    "is_delete": False
                                }
                            },
                        ],
                        "must_not": [{
                            "exists": {
                                "field": "stem_of"
                            }
                        }],
                    }
                },
                "size": limit,
                "sort": {
                    "created_at": "desc"
                },
            },
            {
                "index": ES_PLAYLISTS
            },
            {
                "query": {
                    "bool": {
                        "must": [
                            following_ids_terms_lookup(current_user_id,
                                                       "playlist_owner_id"),
                            {
                                "term": {
                                    "is_private": False
                                }
                            },
                            {
                                "term": {
                                    "is_delete": False
                                }
                            },
                        ]
                    }
                },
                "size": limit,
                "sort": {
                    "created_at": "desc"
                },
            },
        ])

    repost_agg = []
    tracks = []
    playlists = []

    founds = esclient.msearch(searches=mdsl)

    if load_reposts:
        repost_agg = founds["responses"].pop(0)
        repost_agg = repost_agg["aggregations"]["item_key"]["buckets"]
        for bucket in repost_agg:
            bucket["created_at"] = bucket["min_created_at"]["value_as_string"]
            bucket["item_key"] = bucket["key"]
        repost_agg.sort(key=lambda b: b["min_created_at"]["value"])

    if load_orig:
        tracks = pluck_hits(founds["responses"].pop(0))
        playlists = pluck_hits(founds["responses"].pop(0))

    # track timestamps and duplicates
    seen = set()
    unsorted_feed = []

    for playlist in playlists:
        # Q: should es-indexer set item_key on track / playlist too?
        #    instead of doing it dynamically here?
        playlist["item_key"] = item_key(playlist)
        seen.add(playlist["item_key"])
        # Q: should we add playlist tracks to seen?
        #    get_feed will "debounce" tracks in playlist
        unsorted_feed.append(playlist)

    for track in tracks:
        track["item_key"] = item_key(track)
        seen.add(track["item_key"])
        unsorted_feed.append(track)

    # remove duplicates from repost feed
    for r in repost_agg:
        k = r["key"]
        if k in seen:
            continue
        seen.add(k)
        unsorted_feed.append(r)

    # sorted feed with repost records
    # the repost records are stubs that we'll now "hydrate"
    # with the related track / playlist
    sorted_with_reposts = sorted(
        unsorted_feed,
        key=lambda entry: entry["created_at"],
        reverse=True,
    )

    # take a "soft limit" here.  Some tracks / reposts might get filtered out below
    # if is_delete
    sorted_with_reposts = sorted_with_reposts[0:limit * 2]

    mget_reposts = []
    keyed_reposts = {}

    # hydrate repost stubs (agg bucket results)
    # min_created_at indicates a repost stub
    for r in sorted_with_reposts:
        if "min_created_at" not in r:
            continue
        (kind, id) = r["key"].split(":")
        if kind == "track":
            mget_reposts.append({"_index": ES_TRACKS, "_id": id})
        else:
            mget_reposts.append({"_index": ES_PLAYLISTS, "_id": id})

    if mget_reposts:
        reposted_docs = esclient.mget(docs=mget_reposts)
        for doc in reposted_docs["docs"]:
            if not doc["found"]:
                # MISSING: a repost for a track or playlist not in the index?
                # this should only happen if repost indexing is running ahead of track / playlist
                # should be transient... but should maybe still be tracked?
                continue
            s = doc["_source"]
            s["item_key"] = item_key(s)
            if (s.get("is_delete") or s.get("is_private")
                    or s.get("is_unlisted") or s.get("stem_of")):
                # MISSING: skip reposts for delete, private, unlisted, stem_of
                # this is why we took soft limit above
                continue
            keyed_reposts[s["item_key"]] = s

    # replace repost with underlying items
    sorted_feed = []
    for x in sorted_with_reposts:
        if "min_created_at" not in x:
            x["activity_timestamp"] = x["created_at"]
            sorted_feed.append(x)
        else:
            k = x["key"]
            if k not in keyed_reposts:
                # MISSING: see above
                continue
            item = keyed_reposts[k]
            item["activity_timestamp"] = x["min_created_at"]["value_as_string"]
            sorted_feed.append(item)

    # attach users
    user_id_list = [str(id) for id in get_users_ids(sorted_feed)]
    user_id_list.append(current_user_id)
    user_list = esclient.mget(index=ES_USERS, ids=user_id_list)
    user_by_id = {
        d["_id"]: d["_source"]
        for d in user_list["docs"] if d["found"]
    }

    # populate_user_metadata_es:
    current_user = user_by_id.pop(str(current_user_id))
    for id, user in user_by_id.items():
        user_by_id[id] = populate_user_metadata_es(user, current_user)

    for item in sorted_feed:
        # GOTCHA: es ids must be strings, but our ids are ints...
        uid = str(item.get("playlist_owner_id", item.get("owner_id")))
        item["user"] = user_by_id[uid]

    # add context: followee_reposts, followee_saves
    # currently this over-fetches because there is no per-item grouping
    # really it should use an aggregation with top hits
    # to bucket ~3 saves / reposts per item
    item_keys = [i["item_key"] for i in sorted_feed]

    (follow_saves, follow_reposts) = fetch_followed_saves_and_reposts(
        current_user_id, item_keys, limit * 20)

    for item in sorted_feed:
        item["followee_reposts"] = follow_reposts[item["item_key"]]
        item["followee_saves"] = follow_saves[item["item_key"]]

    # populate metadata + remove extra fields from items
    sorted_feed = [
        populate_track_or_playlist_metadata_es(item, current_user)
        for item in sorted_feed
    ]

    return sorted_feed[0:limit]
Example #17
0
def _get_repost_feed_for_user(session: Session, user_id: int,
                              args: GetRepostFeedForUserArgs):
    feed_results = []
    current_user_id = args.get("current_user_id")
    limit = args.get("limit")
    offset = args.get("offset")
    if "handle" in args:
        handle = args.get("handle") or ""
        user_id = cast(
            int,
            session.query(
                User.user_id).filter(User.handle_lc == handle.lower()).first(),
        )

    # Query all reposts by a user.
    # Outerjoin both tracks and playlists to collect both
    # so that a single limit/offset pagination does what we intend when tracks or playlists
    # are deleted.
    repost_query = (
        session.query(Repost, Track, Playlist).outerjoin(
            Track,
            and_(
                Repost.repost_item_id == Track.track_id,
                Repost.repost_type == "track",
                Track.is_current == True,
                Track.is_delete == False,
                Track.is_unlisted == False,
                Track.stem_of == None,
            ),
        ).outerjoin(
            Playlist,
            and_(
                Repost.repost_item_id == Playlist.playlist_id,
                or_(Repost.repost_type == "playlist",
                    Repost.repost_type == "album"),
                Playlist.is_current == True,
                Playlist.is_delete == False,
                Playlist.is_private == False,
            ),
        ).filter(
            Repost.is_current == True,
            Repost.is_delete == False,
            Repost.user_id == user_id,
            # Drop rows that have no join found for either track or playlist
            or_(Track.track_id != None, Playlist.playlist_id != None),
        ).order_by(
            desc(Repost.created_at),
            desc(Repost.repost_item_id),
            desc(Repost.repost_type),
        ))

    reposts = add_query_pagination(repost_query, limit, offset).all()
    # get track reposts from above
    track_reposts = [r[0] for r in reposts if r[1] is not None]
    track_reposts = helpers.query_result_to_list(track_reposts)

    # get playlist reposts from above
    playlist_reposts = [r[0] for r in reposts if r[2] is not None]
    playlist_reposts = helpers.query_result_to_list(playlist_reposts)

    # build track/playlist id --> repost dict from repost lists
    track_repost_dict = {
        repost["repost_item_id"]: repost
        for repost in track_reposts
    }
    playlist_repost_dict = {
        repost["repost_item_id"]: repost
        for repost in playlist_reposts
    }

    tracks = helpers.query_result_to_list(
        filter(None, [repost[1] for repost in reposts]))
    playlists = helpers.query_result_to_list(
        filter(None, [repost[2] for repost in reposts]))

    # get track ids
    track_ids = [track["track_id"] for track in tracks]

    # get playlist ids
    playlist_ids = [playlist["playlist_id"] for playlist in playlists]

    # populate full metadata
    tracks = populate_track_metadata(session, track_ids, tracks,
                                     current_user_id)
    playlists = populate_playlist_metadata(
        session,
        playlist_ids,
        playlists,
        [RepostType.playlist, RepostType.album],
        [SaveType.playlist, SaveType.album],
        current_user_id,
    )

    # add activity timestamps
    for track in tracks:
        track[response_name_constants.activity_timestamp] = track_repost_dict[
            track["track_id"]]["created_at"]

    for playlist in playlists:
        playlist[
            response_name_constants.activity_timestamp] = playlist_repost_dict[
                playlist["playlist_id"]]["created_at"]

    unsorted_feed = tracks + playlists

    # sort feed by repost timestamp desc
    feed_results = sorted(
        unsorted_feed,
        key=lambda entry: entry[response_name_constants.activity_timestamp],
        reverse=True,
    )

    if args.get("with_users", False):
        user_id_list = get_users_ids(feed_results)
        users = get_users_by_id(session, user_id_list)
        for result in feed_results:
            if "playlist_owner_id" in result:
                user = users[result["playlist_owner_id"]]
                if user:
                    result["user"] = user
            elif "owner_id" in result:
                user = users[result["owner_id"]]
                if user:
                    result["user"] = user

    return feed_results
Example #18
0
def get_trending_playlists(args, strategy):
    """Returns Trending Playlists. Checks Redis cache for unpopulated playlists."""
    db = get_db_read_replica()
    with db.scoped_session() as session:
        current_user_id = args.get("current_user_id", None)
        with_tracks = args.get("with_tracks", False)
        time = args.get("time")
        limit, offset = args.get("limit"), args.get("offset")
        key = make_trending_cache_key(time, strategy.version)

        # Get unpopulated playlists,
        # cached if it exists.
        (playlists, playlist_ids) = use_redis_cache(
            key, None, make_get_unpopulated_playlists(session, time, strategy))

        # Apply limit + offset early to reduce the amount of
        # population work we have to do
        if limit is not None and offset is not None:
            playlists = playlists[offset:limit + offset]
            playlist_ids = playlist_ids[offset:limit + offset]

        # Populate playlist metadata
        playlists = populate_playlist_metadata(
            session, playlist_ids, playlists,
            [RepostType.playlist, RepostType.album],
            [SaveType.playlist, SaveType.album], current_user_id)

        trimmed_track_ids = None
        for playlist in playlists:
            playlist["track_count"] = len(playlist["tracks"])
            playlist["tracks"] = playlist["tracks"][:PLAYLIST_TRACKS_LIMIT]
            # Trim track_ids, which ultimately become added_timestamps
            # and need to match the tracks.
            trimmed_track_ids = {
                track["track_id"]
                for track in playlist["tracks"]
            }
            playlist_track_ids = playlist["playlist_contents"]["track_ids"]
            playlist_track_ids = list(
                filter(lambda track_id: track_id["track"] in trimmed_track_ids,
                       playlist_track_ids))
            playlist["playlist_contents"]["track_ids"] = playlist_track_ids

        playlists_map = {
            playlist['playlist_id']: playlist
            for playlist in playlists
        }

        if with_tracks:
            # populate track metadata
            tracks = []
            for playlist in playlists:
                playlist_tracks = playlist["tracks"]
                tracks.extend(playlist_tracks)
            track_ids = [track["track_id"] for track in tracks]
            populated_tracks = populate_track_metadata(session, track_ids,
                                                       tracks, current_user_id)

            # Add users if necessary
            add_users_to_tracks(session, populated_tracks, current_user_id)

            # Re-associate tracks with playlists
            # track_id -> populated_track
            populated_track_map = {
                track["track_id"]: track
                for track in populated_tracks
            }
            for playlist in playlists_map.values():
                for i in range(len(playlist["tracks"])):
                    track_id = playlist["tracks"][i]["track_id"]
                    populated = populated_track_map[track_id]
                    playlist["tracks"][i] = populated
                playlist["tracks"] = list(map(extend_track,
                                              playlist["tracks"]))

        # re-sort playlists to original order, because populate_playlist_metadata
        # unsorts.
        sorted_playlists = [
            playlists_map[playlist_id] for playlist_id in playlist_ids
        ]

        # Add users to playlists
        user_id_list = get_users_ids(sorted_playlists)
        users = get_users_by_id(session, user_id_list, current_user_id)
        for playlist in sorted_playlists:
            user = users[playlist['playlist_owner_id']]
            if user:
                playlist['user'] = user

        # Extend the playlists
        playlists = list(map(extend_playlist, playlists))
        return sorted_playlists
def get_top_playlists(kind, args):
    current_user_id = get_current_user_id(required=False)

    # Argument parsing and checking
    if kind not in ("playlist", "album"):
        raise exceptions.ArgumentError(
            "Invalid kind provided, must be one of 'playlist', 'album'"
        )

    limit = args.get("limit", 16)
    mood = args.get("mood", None)

    if "filter" in args:
        query_filter = args.get("filter")
        if query_filter != "followees":
            raise exceptions.ArgumentError(
                "Invalid filter provided, must be one of 'followees'"
            )
        if query_filter == "followees":
            if not current_user_id:
                raise exceptions.ArgumentError(
                    "User id required to query for followees"
                )
    else:
        query_filter = None

    db = get_db_read_replica()
    with db.scoped_session() as session:

        # If filtering by followees, set the playlist view to be only playlists from
        # users that the current user follows.
        if query_filter == "followees":
            playlists_to_query = create_followee_playlists_subquery(
                session, current_user_id
            )
        # Otherwise, just query all playlists
        else:
            playlists_to_query = session.query(Playlist).subquery()

        # Create a decayed-score view of the playlists
        playlist_query = (
            session.query(
                playlists_to_query,
                (AggregatePlaylist.repost_count + AggregatePlaylist.save_count).label(
                    "count"
                ),
                decayed_score(
                    AggregatePlaylist.repost_count + AggregatePlaylist.save_count,
                    playlists_to_query.c.created_at,
                ).label("score"),
            )
            .select_from(playlists_to_query)
            .join(
                AggregatePlaylist,
                AggregatePlaylist.playlist_id == playlists_to_query.c.playlist_id,
            )
            .filter(
                playlists_to_query.c.is_current == True,
                playlists_to_query.c.is_delete == False,
                playlists_to_query.c.is_private == False,
                playlists_to_query.c.is_album == (kind == "album"),
            )
        )

        # Filter by mood (no-op if no mood is provided)
        playlist_query = filter_to_playlist_mood(
            session, mood, playlist_query, playlists_to_query
        )

        # Order and limit the playlist query by score
        playlist_query = playlist_query.order_by(
            desc("score"), desc(playlists_to_query.c.playlist_id)
        ).limit(limit)

        playlist_results = playlist_query.all()

        # Unzip query results into playlists and scores
        score_map = {}  # playlist_id : score
        playlists = []
        if playlist_results:
            for result in playlist_results:
                # The playlist is the portion of the query result before repost_count and score
                playlist = result[0:-2]
                score = result[-1]

                # Convert the playlist row tuple into a dictionary keyed by column name
                playlist = helpers.tuple_to_model_dictionary(playlist, Playlist)
                score_map[playlist["playlist_id"]] = score
                playlists.append(playlist)

        playlist_ids = list(map(lambda playlist: playlist["playlist_id"], playlists))

        # Bundle peripheral info into playlist results
        playlists = populate_playlist_metadata(
            session,
            playlist_ids,
            playlists,
            [RepostType.playlist, RepostType.album],
            [SaveType.playlist, SaveType.album],
            current_user_id,
        )
        # Add scores into the response
        for playlist in playlists:
            playlist["score"] = score_map[playlist["playlist_id"]]

        if args.get("with_users", False):
            user_id_list = get_users_ids(playlists)
            users = get_users_by_id(session, user_id_list)
            for playlist in playlists:
                user = users[playlist["playlist_owner_id"]]
                if user:
                    playlist["user"] = user

    return playlists
Example #20
0
def get_tracks(args):
    tracks = []
    db = get_db_read_replica()
    with db.scoped_session() as session:
        # Create initial query
        base_query = session.query(Track)
        base_query = base_query.filter(Track.is_current == True,
                                       Track.is_unlisted == False,
                                       Track.stem_of == None)

        # Conditionally process an array of tracks
        if "id" in args:
            track_id_list = args.get("id")
            try:
                # Update query with track_id list
                base_query = base_query.filter(
                    Track.track_id.in_(track_id_list))
            except ValueError as e:
                logger.error("Invalid value found in track id list",
                             exc_info=True)
                raise e

        # Allow filtering of tracks by a certain creator
        if "user_id" in args:
            user_id = args.get("user_id")
            base_query = base_query.filter(Track.owner_id == user_id)

        # Allow filtering of deletes
        if "filter_deleted" in args:
            filter_deleted = args.get("filter_deleted")
            if filter_deleted:
                base_query = base_query.filter(Track.is_delete == False)

        if "min_block_number" in args:
            min_block_number = args.get("min_block_number")
            base_query = base_query.filter(
                Track.blocknumber >= min_block_number)

        whitelist_params = [
            'created_at', 'create_date', 'release_date', 'blocknumber',
            'track_id'
        ]
        base_query = parse_sort_param(base_query, Track, whitelist_params)
        query_results = paginate_query(base_query).all()
        tracks = helpers.query_result_to_list(query_results)

        track_ids = list(map(lambda track: track["track_id"], tracks))

        current_user_id = get_current_user_id(required=False)

        # bundle peripheral info into track results
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list)
            for track in tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user

    return tracks
Example #21
0
def get_tracks_including_unlisted(args):
    """Fetch a track, allowing unlisted.

    Args:
        args: dict
        args.identifiers: array of { handle, id, url_title} dicts
        args.current_user_id: optional current user ID
        args.filter_deleted: filter deleted tracks
        args.with_users: include users in unlisted tracks
    """
    tracks = []
    identifiers = args["identifiers"]
    for i in identifiers:
        helpers.validate_arguments(i, ["handle", "id", "url_title"])

    current_user_id = args.get("current_user_id")
    db = get_db_read_replica()
    with db.scoped_session() as session:

        def get_unpopulated_track():
            base_query = session.query(Track)
            filter_cond = []

            # Create filter conditions as a list of `and` clauses
            for i in identifiers:
                filter_cond.append(
                    and_(Track.is_current == True, Track.track_id == i["id"]))

            # Pass array of `and` clauses into an `or` clause as destructured *args
            base_query = base_query.filter(or_(*filter_cond))

            # Allow filtering of deletes
            # Note: There is no standard for boolean url parameters, and any value (including 'false')
            # will be evaluated as true, so an explicit check is made for true
            if "filter_deleted" in args:
                filter_deleted = args.get("filter_deleted")
                if filter_deleted:
                    base_query = base_query.filter(Track.is_delete == False)

            # Perform the query
            # TODO: pagination is broken with unlisted tracks
            query_results = paginate_query(base_query).all()
            tracks = helpers.query_result_to_list(query_results)

            # Mapping of track_id -> track object from request;
            # used to check route_id when iterating through identifiers
            identifiers_map = {track["id"]: track for track in identifiers}

            # If the track is unlisted and the generated route_id does not match the route_id in db,
            # filter track out from response
            def filter_fn(track):
                input_track = identifiers_map[track["track_id"]]
                route_id = helpers.create_track_route_id(
                    input_track["url_title"], input_track["handle"])

                return not track["is_unlisted"] or track["route_id"] == route_id

            tracks = list(filter(filter_fn, tracks))

            track_ids = list(map(lambda track: track["track_id"], tracks))
            return (tracks, track_ids)

        key = make_cache_key(args)
        (tracks,
         track_ids) = use_redis_cache(key,
                                      UNPOPULATED_TRACK_CACHE_DURATION_SEC,
                                      get_unpopulated_track)

        # Add users
        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in tracks:
                user = users[track["owner_id"]]
                if user:
                    track["user"] = user
        # Populate metadata
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

    return tracks
Example #22
0
def get_top_followee_windowed(type, window, args):
    if type != 'track':
        raise exceptions.ArgumentError(
            "Invalid type provided, must be one of 'track'"
        )

    valid_windows = ['week', 'month', 'year']
    if not window or window not in valid_windows:
        raise exceptions.ArgumentError(
            "Invalid window provided, must be one of {}".format(valid_windows)
        )

    limit = args.get('limit', 25)

    current_user_id = get_current_user_id()
    db = get_db_read_replica()
    with db.scoped_session() as session:
        # Construct a subquery to get the summed save + repost count for the `type`
        count_subquery = create_save_repost_count_subquery(session, type)

        followee_user_ids = (
            session.query(Follow.followee_user_id)
            .filter(
                Follow.follower_user_id == current_user_id,
                Follow.is_current == True,
                Follow.is_delete == False
            )
        )
        followee_user_ids_subquery = followee_user_ids.subquery()

        # Queries for tracks joined against followed users and counts
        tracks_query = (
            session.query(
                Track,
            )
            .join(
                followee_user_ids_subquery,
                Track.owner_id == followee_user_ids_subquery.c.followee_user_id
            )
            .join(
                count_subquery,
                Track.track_id == count_subquery.c['id']
            )
            .filter(
                Track.is_current == True,
                Track.is_delete == False,
                Track.is_unlisted == False,
                Track.stem_of == None,
                # Query only tracks created `window` time ago (week, month, etc.)
                Track.created_at >= text(
                    "NOW() - interval '1 {}'".format(window)),
            )
            .order_by(
                desc(count_subquery.c['count']),
                desc(Track.track_id)
            )
            .limit(limit)
        )

        tracks_query_results = tracks_query.all()
        tracks = helpers.query_result_to_list(tracks_query_results)
        track_ids = list(map(lambda track: track['track_id'], tracks))

        # Bundle peripheral info into track results
        tracks = populate_track_metadata(
            session, track_ids, tracks, current_user_id)

        if args.get('with_users', False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list)
            for track in tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user

    return tracks
def search(args):
    """Perform a search. `args` should contain `is_auto_complete`,
    `query`, `kind`, `current_user_id`, and `only_downloadable`
    """

    if os.getenv("audius_elasticsearch_search_enabled"):
        try:
            resp = search_es_full(args)
            return resp
        except Exception as e:
            logger.error(f"Elasticsearch error: {e}")

    search_str = args.get("query")

    # when creating query table, we substitute this too
    search_str = search_str.replace("&", "and")

    kind = args.get("kind", "all")
    is_auto_complete = args.get("is_auto_complete")
    current_user_id = args.get("current_user_id")
    only_downloadable = args.get("only_downloadable")
    limit = args.get("limit")
    offset = args.get("offset")

    searchKind = SearchKind[kind]

    results = {}

    # Accumulate user_ids for later
    user_ids = set()

    # Create args for perform_search_query
    search_args = {
        "search_str": search_str,
        "limit": limit,
        "offset": offset,
        "is_auto_complete": is_auto_complete,
        "current_user_id": current_user_id,
        "only_downloadable": only_downloadable,
    }

    if search_str:
        db = get_db_read_replica()
        # Concurrency approach:
        # Spin up a ThreadPoolExecutor for each request to perform_search_query
        # to perform the different search types in parallel.
        # After each future resolves, we then add users for each entity in a single
        # db round trip.
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            # Keep a mapping of future -> search_type
            futures_map = {}
            futures = []

            # Helper fn to submit a future and add it to bookkeeping data structures
            def submit_and_add(search_type):
                future = executor.submit(
                    perform_search_query, db, search_type, search_args
                )
                futures.append(future)
                futures_map[future] = search_type

            if searchKind in [SearchKind.all, SearchKind.tracks]:
                submit_and_add("tracks")

            if searchKind in [SearchKind.all, SearchKind.users]:
                submit_and_add("users")
            if searchKind in [SearchKind.all, SearchKind.playlists]:
                submit_and_add("playlists")

            if searchKind in [SearchKind.all, SearchKind.albums]:
                submit_and_add("albums")

            for future in concurrent.futures.as_completed(futures):
                search_result = future.result()
                future_type = futures_map[future]

                # Add to the final results
                # Add to user_ids
                if future_type == "tracks":
                    results["tracks"] = search_result["all"]
                    results["saved_tracks"] = search_result["saved"]
                elif future_type == "users":
                    results["users"] = search_result["all"]
                    results["followed_users"] = search_result["followed"]
                elif future_type == "playlists":
                    results["playlists"] = search_result["all"]
                    results["saved_playlists"] = search_result["saved"]
                elif future_type == "albums":
                    results["albums"] = search_result["all"]
                    results["saved_albums"] = search_result["saved"]
                user_ids.update(get_users_ids(search_result["all"]))

            with db.scoped_session() as session:
                # Add users back
                users = get_users_by_id(session, list(user_ids), current_user_id)

                for (_, result_list) in results.items():
                    for result in result_list:
                        user_id = None
                        if "playlist_owner_id" in result:
                            user_id = result["playlist_owner_id"]
                        elif "owner_id" in result:
                            user_id = result["owner_id"]

                        if user_id is not None:
                            user = users[user_id]
                            result["user"] = user
    return extend_search(results)
Example #24
0
def get_tracks(args):
    """
    Gets tracks.
    A note on caching strategy:
        - This method is cached at two layers: at the API via the @cache decorator,
        and within this method using the shared get_unpopulated_tracks cache.

        The shared cache only works when fetching via ID, so calls to fetch tracks
        via handle, asc/desc sort, or filtering by block_number won't hit the shared cache.
        These will hit the API cache unless they have a current_user_id included.

    """
    tracks = []

    db = get_db_read_replica()
    with db.scoped_session() as session:

        def get_tracks_and_ids():
            if "handle" in args:
                handle = args.get("handle")
                user_id = session.query(User.user_id).filter(
                    User.handle_lc == handle.lower()).first()
                args["user_id"] = user_id

            can_use_shared_cache = ("id" in args
                                    and not "min_block_number" in args
                                    and not "sort" in args
                                    and not "user_id" in args)

            if can_use_shared_cache:
                should_filter_deleted = args.get("filter_deleted", False)
                tracks = get_unpopulated_tracks(session, args["id"],
                                                should_filter_deleted)
                track_ids = list(map(lambda track: track["track_id"], tracks))
                return (tracks, track_ids)

            # Create initial query
            base_query = session.query(Track)
            base_query = base_query.filter(Track.is_current == True,
                                           Track.is_unlisted == False,
                                           Track.stem_of == None)

            # Conditionally process an array of tracks
            if "id" in args:
                track_id_list = args.get("id")
                try:
                    # Update query with track_id list
                    base_query = base_query.filter(
                        Track.track_id.in_(track_id_list))
                except ValueError as e:
                    logger.error("Invalid value found in track id list",
                                 exc_info=True)
                    raise e

            # Allow filtering of tracks by a certain creator
            if "user_id" in args:
                user_id = args.get("user_id")
                base_query = base_query.filter(Track.owner_id == user_id)

            # Allow filtering of deletes
            if "filter_deleted" in args:
                filter_deleted = args.get("filter_deleted")
                if filter_deleted:
                    base_query = base_query.filter(Track.is_delete == False)

            if "min_block_number" in args:
                min_block_number = args.get("min_block_number")
                base_query = base_query.filter(
                    Track.blocknumber >= min_block_number)

            if "sort" in args:
                if args["sort"] == "date":
                    base_query = base_query.order_by(Track.created_at.desc(),
                                                     Track.track_id.desc())
                elif args["sort"] == "plays":
                    base_query = base_query.join(
                        AggregatePlays, AggregatePlays.play_item_id ==
                        Track.track_id).order_by(AggregatePlays.count.desc())
                else:
                    whitelist_params = [
                        'created_at', 'create_date', 'release_date',
                        'blocknumber', 'track_id'
                    ]
                    base_query = parse_sort_param(base_query, Track,
                                                  whitelist_params)
            query_results = paginate_query(base_query).all()
            tracks = helpers.query_result_to_list(query_results)

            track_ids = list(map(lambda track: track["track_id"], tracks))

            return (tracks, track_ids)

        (tracks, track_ids) = get_tracks_and_ids()

        # bundle peripheral info into track results
        current_user_id = args.get("current_user_id")
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)

        if args.get("with_users", False):
            user_id_list = get_users_ids(tracks)
            users = get_users_by_id(session, user_id_list, current_user_id)
            for track in tracks:
                user = users[track['owner_id']]
                if user:
                    track['user'] = user

    return tracks
def get_repost_feed_for_user(user_id, args):
    feed_results = {}
    db = get_db_read_replica()
    current_user_id = args.get("current_user_id")

    with db.scoped_session() as session:
        if "handle" in args:
            handle = args.get("handle")
            user_id = session.query(
                User.user_id).filter(User.handle_lc == handle.lower()).first()

        # query all reposts by user
        repost_query = (session.query(Repost).filter(
            Repost.is_current == True, Repost.is_delete == False,
            Repost.user_id == user_id).order_by(desc(Repost.created_at),
                                                desc(Repost.repost_item_id),
                                                desc(Repost.repost_type)))

        reposts = paginate_query(repost_query).all()

        # get track reposts from above
        track_reposts = [
            r for r in reposts if r.repost_type == RepostType.track
        ]

        # get reposted track ids
        repost_track_ids = [r.repost_item_id for r in track_reposts]

        # get playlist reposts from above
        playlist_reposts = [
            r for r in reposts if r.repost_type == RepostType.playlist
            or r.repost_type == RepostType.album
        ]

        # get reposted playlist ids
        repost_playlist_ids = [r.repost_item_id for r in playlist_reposts]

        track_reposts = helpers.query_result_to_list(track_reposts)
        playlist_reposts = helpers.query_result_to_list(playlist_reposts)

        # build track/playlist id --> repost dict from repost lists
        track_repost_dict = {
            repost["repost_item_id"]: repost
            for repost in track_reposts
        }
        playlist_repost_dict = {
            repost["repost_item_id"]: repost
            for repost in playlist_reposts
        }

        # query tracks for repost_track_ids
        track_query = (session.query(Track).filter(
            Track.is_current == True, Track.is_delete == False,
            Track.is_unlisted == False, Track.stem_of == None,
            Track.track_id.in_(repost_track_ids)).order_by(
                desc(Track.created_at)))
        tracks = track_query.all()
        tracks = helpers.query_result_to_list(tracks)

        # get track ids
        track_ids = [track["track_id"] for track in tracks]

        # query playlists for repost_playlist_ids
        playlist_query = (session.query(Playlist).filter(
            Playlist.is_current == True, Playlist.is_delete == False,
            Playlist.is_private == False,
            Playlist.playlist_id.in_(repost_playlist_ids)).order_by(
                desc(Playlist.created_at)))
        playlists = playlist_query.all()
        playlists = helpers.query_result_to_list(playlists)

        # get playlist ids
        playlist_ids = [playlist["playlist_id"] for playlist in playlists]

        # populate full metadata
        tracks = populate_track_metadata(session, track_ids, tracks,
                                         current_user_id)
        playlists = populate_playlist_metadata(
            session, playlist_ids, playlists,
            [RepostType.playlist, RepostType.album],
            [SaveType.playlist, SaveType.album], current_user_id)

        # add activity timestamps
        for track in tracks:
            track[response_name_constants.
                  activity_timestamp] = track_repost_dict[
                      track["track_id"]]["created_at"]

        for playlist in playlists:
            playlist[response_name_constants.activity_timestamp] = \
                playlist_repost_dict[playlist["playlist_id"]]["created_at"]

        unsorted_feed = tracks + playlists

        # sort feed by repost timestamp desc
        feed_results = sorted(unsorted_feed,
                              key=lambda entry: entry[response_name_constants.
                                                      activity_timestamp],
                              reverse=True)

        if args.get("with_users", False):
            user_id_list = get_users_ids(feed_results)
            users = get_users_by_id(session, user_id_list)
            for result in feed_results:
                if 'playlist_owner_id' in result:
                    user = users[result['playlist_owner_id']]
                    if user:
                        result['user'] = user
                elif 'owner_id' in result:
                    user = users[result['owner_id']]
                    if user:
                        result['user'] = user

    return feed_results
Example #26
0
def get_playlists(args):
    playlists = []
    current_user_id = get_current_user_id(required=False)
    filter_out_private_playlists = True

    db = get_db_read_replica()
    with db.scoped_session() as session:
        try:
            playlist_query = (
                session.query(Playlist)
                .filter(Playlist.is_current == True)
            )

            # playlist ids filter if the optional query param is passed in
            if "playlist_id" in args:
                playlist_id_list = args.get("playlist_id")
                try:
                    playlist_query = playlist_query.filter(Playlist.playlist_id.in_(playlist_id_list))
                except ValueError as e:
                    raise exceptions.ArgumentError("Invalid value found in playlist id list", e)

            if "user_id" in args:
                user_id = args.get("user_id")
                # user id filter if the optional query param is passed in
                playlist_query = playlist_query.filter(
                    Playlist.playlist_owner_id == user_id
                )

                # if the current user is the same as the user passed in through the query param then we're trying
                # to get playlists for, check if the users are the same. if they are the same, the current user is
                # trying to request their own playlists, so allow them to see private playlists
                if current_user_id and user_id and (int(current_user_id) == int(user_id)):
                    filter_out_private_playlists = False

            if filter_out_private_playlists:
                playlist_query = playlist_query.filter(
                    Playlist.is_private == False
                )

            # Filter out deletes unless we're fetching explicitly by id
            if "playlist_id" not in args:
                playlist_query = playlist_query.filter(
                    Playlist.is_delete == False
                )

            playlist_query = playlist_query.order_by(desc(Playlist.created_at))
            playlists = paginate_query(playlist_query).all()
            playlists = helpers.query_result_to_list(playlists)

            # retrieve playlist ids list
            playlist_ids = list(map(lambda playlist: playlist["playlist_id"], playlists))

            current_user_id = get_current_user_id(required=False)

            # bundle peripheral info into playlist results
            playlists = populate_playlist_metadata(
                session,
                playlist_ids,
                playlists,
                [RepostType.playlist, RepostType.album],
                [SaveType.playlist, SaveType.album],
                current_user_id
            )

            if args.get("with_users", False):
                user_id_list = get_users_ids(playlists)
                users = get_users_by_id(session, user_id_list)
                for playlist in playlists:
                    user = users[playlist['playlist_owner_id']]
                    if user:
                        playlist['user'] = user

        except sqlalchemy.orm.exc.NoResultFound:
            pass
    return playlists