Example #1
0
def get_similar_artists(artist, limit=10, min_song_ct=5, artist_first=True):
    """Returns a list of similar artist names to @artist."""
    # Skip the ORM and directly execute the SQL for performance reasons
    QUERYSTR = text('''
SELECT song1.artist, song2.artist, COUNT(sim.similarity) AS sim_count,
    scounts.count AS song_count,
    COUNT(sim.similarity) / CAST(scounts.count AS FLOAT) AS sim_count_norm
    FROM songs AS song1 JOIN similarities AS sim ON song1.song_id=sim.song1_id
    JOIN songs AS song2 ON sim.song2_id=song2.song_id
    JOIN (SELECT s.artist AS artist, COUNT(s.id) AS count
        FROM songs AS s
        GROUP BY s.artist) AS scounts ON song2.artist=scounts.artist
    WHERE song1.artist=:artist AND scounts.count > :count
    GROUP BY song1.artist, song2.artist, scounts.artist, scounts.count
    ORDER BY sim_count_norm DESC
    LIMIT :limit;
    ''')
    # This *should* be secure since it uses bound parameters which are
    # passed to the underlying DPAPI:
    # http://docs.sqlalchemy.org/en/rel_0_9/orm/session_api.html
    #     #sqlalchemy.orm.session.Session.execute
    # http://docs.sqlalchemy.org/en/rel_0_9/core/sqlelement.html
    #     #sqlalchemy.sql.expression.text
    # http://docs.sqlalchemy.org/en/rel_0_9/core/sqlelement.html
    #     #sqlalchemy.sql.expression.bindparam
    params = { 'artist': artist, 'count': min_song_ct, 'limit': limit }
    # Result tuple: ('artist1', 'artist2', sim_ct, song_ct, norm_sim_ct)
    artists = [row[1] for row in DB_SESSION.execute(QUERYSTR, params)]
    if artist_first:
        if artist in artists:
            artists.remove(artist)
        return [artist] + artists
    else:
        return artists
Example #2
0
def get_similar_songs(artist, limit=10):
    """Returns a list of similar (song, artist) tuples to @artist."""
    QUERYSTR = text('''
SELECT song1.artist, song2.title, song2.artist, SUM(sim.similarity) AS total_sim
    FROM songs AS song1 JOIN similarities AS sim ON song1.song_id=sim.song1_id
    JOIN songs AS song2 ON sim.song2_id=song2.song_id
    WHERE song1.artist=:artist
    GROUP BY song1.artist, song2.title, song2.artist
    ORDER BY total_sim DESC
    LIMIT :limit;
    ''')
    params = { 'artist': artist, 'limit': limit }
    # Result tuple: ('artist1', 'song2', 'artist2', sim)
    return ['%s %s' % (row[1], row[2])
            for row in DB_SESSION.execute(QUERYSTR, params)]