def ArtistSkipProbability(x, asc=1, zeros_ones=0): # This function calculates the probability of skipping each artist and displays in a list of x length. # Also displays the ratio of skips to plays # Set asc=1 to list in ascending order, asc=0 for descending. # Set zeros_ones=1 to include arists with values of 0 or 1. In large datasets there can be an overwheling number of both. # command: aprob <x = length> < asc = boolean for ascending or descending> <zeros_ones = boolean to include probs of 0 or 1> if asc: ordr = "ASC" else: ordr = "DESC" db.query("SELECT SUM(skip_count)/(SUM(skip_count)+SUM(play_count)) AS p, artist, SUM(skip_count) as skips, SUM(play_count) as listens FROM library GROUP BY artist ORDER BY p " + ordr +";") ret = db.rs i = 0 for song in ret: if(song["p"] != None):#and if(zeros_ones or (song["p"] > 0.000000 and song["p"] < 1.0)): i += 1 print("#" + str(i) + " ==============================") print("Artist: " + song["artist"]) print("Probability of skipping: " + str(song["p"])) print("Ratio of skips to plays: " + str(song["skips"]) + ":" + str(song["listens"]) ) print() if(i==x): break
def LibGrowthChart(): print("Gathering data... ") y = [] x = [] i = 0 db.query("SELECT DISTINCT(YEAR(date_added)) yr FROM library ORDER BY yr;") years = [item['yr'] for item in db.rs] for year in years: for mo in range(1,12): # db.query("SELECT COUNT(id) count FROM library WHERE (YEAR(date_added) = " + str(year) + " AND MONTH(date_added) < " + str(mo) + ") OR YEAR(date_added) < " + str(year) + ";") # total = db.rs[0]['count'] db.query("SELECT COUNT(id) count FROM library WHERE YEAR(date_added) = " + str(year) + " AND MONTH(date_added) = " + str(mo) + ";") count = db.rs[0]['count'] # change = count - total x.append(year + mo/12) y.append(count) print("Be sure to close chart window before continuing.") plt.plot(x,y) plt.gcf().autofmt_xdate() plt.show()
def GenreTreeMap(): print("Be sure to close chart window before continuing") db.query("SELECT COUNT(id) as count, SUM(play_count) as pc, genre FROM library GROUP BY genre ORDER BY count DESC LIMIT 20;") sizs = [] labels = [] plays = [] for genre in db.rs: sizs.append(genre['count']) labels.append(genre['genre']) plays.append(int(genre['pc'])) # print(plays) # create a color palette, mapped to these values cmap = matplotlib.cm.Reds mini=min(plays) maxi=max(plays) norm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi) colors = [cmap(norm(value)) for value in plays] # Change color squarify.plot(sizes=sizs,label=labels, alpha=.8)#, color=colors ) plt.axis('off') plt.show()
def TopXArtistsBySongs(leng=10): db.query("SELECT COUNT(id) AS cnt, artist FROM library GROUP BY artist ORDER BY cnt DESC LIMIT " + str(leng) + ";") ret = db.rs i = 0 for song in ret: i += 1 print("#" + str(i) + " ==============================") print("Artist: " + song["artist"]) print("Number Of Songs: " + str(song["cnt"])) print()
def PlayListUnheard(db_name, x=50, genre="recent", since=1900): # generates a playlist of songs that have not been heard before. # pass x for list len, genre to specify genre(x most recent are used as default), and min year released for playlist. # prints artist and song db2 = DBConnector() if(db2.execute("USE "+ db_name + ";") == -1): return -1 i = 0 if(genre.lower() == "rap"): genre += " hip hop " if(genre == "recent"): q = "SELECT genre FROM library ORDER BY play_date DESC LIMIT " + str(x) + ";" db.query(q) for row in db.rs: gen = row["genre"] q2 = "SELECT SUBSTRING(name,1,50) as name, SUBSTRING(artist,1,50) as artist FROM library WHERE genre LIKE '" + gen + "' AND play_count = 0 AND YEAR(rel_date) >= " + str(since) + " ORDER BY RAND() LIMIT 1;" db2.query(q2) for row2 in db2.rs: i += 1 name = row2["name"] artist = row2["artist"] print("#" + str(i) + " ===========================") print("\tSong: " + name) print("\tArtist: " + artist) else: words = re.findall(r"[\w']+", genre) genrestr = "" for word in words: genrestr += " genre LIKE '%" + word + "%' OR" genrestr = genrestr[:-3] q2 = "SELECT SUBSTRING(name,1,50) as name, SUBSTRING(artist,1,50) as artist, genre FROM library WHERE " + genrestr + " AND play_count = 0 AND YEAR(rel_date) >= " + str(since) + " ORDER BY RAND() LIMIT " + str(x) + ";" db2.query(q2) for row2 in db2.rs: i += 1 name = row2["name"] artist = row2["artist"] genre = row2["genre"] print("#" + str(i) + " ===========================") print("\tSong: " + name) print("\tArtist: " + artist) print("\tGenre:" + genre) db2.disconnect()
def TopXGenresBySongs(x=10): #top x genres by the number of songs in that genre # command: gbs <x - optional list len> db.query("SELECT COUNT(id) AS cnt, genre FROM library GROUP BY genre ORDER BY cnt DESC LIMIT " + str(x) + ";") ret = db.rs i = 0 for song in ret: i += 1 print("#" + str(i) + " ==============================") print("Genre: " + song["genre"]) print("Number Of Songs: " + str(song["cnt"])) print()
def NumSongsByYear(x=10): # prints top x years based on number of songs # command sby <x = length> db.query("SELECT COUNT(id) AS sm, year FROM library GROUP BY year ORDER BY sm DESC LIMIT " + str(x) + ";") ret = db.rs i = 0 for song in ret: i += 1 print("#" + str(i) + " ==============================") print("Year: " + str(song["year"])) print("Number of Songs: " + str(song["sm"])) print()
def listenChart(): y = [] x = [] print("Gathering data...") global db db.query("SELECT COUNT(record_id) AS count, DATE(listen_date) AS date FROM listening_history GROUP BY date ORDER BY date;") for date in db.rs: x.append(date['date']) y.append(date['count']) print("Be sure to close chart window before continuing.") plt.plot(x,y) plt.gcf().autofmt_xdate() plt.show()
def TopXSongsByPlays(x=10): # top x songs by play count # command sbp <optional x list len> if(x<1): return db.query("SELECT * FROM library ORDER BY play_count DESC LIMIT " + str(x) + ";") ret = db.rs i = 0 for song in ret: i += 1 print("#" + str(i) + " ==============================") print("Title: " + song["name"]) print("Artist: " + song["artist"]) print("Play Count: " + str(song['play_count'])) print()
def MonthsBySongsAdded(): # produces a chart of total songs added for each month of the year # command: monchart db.query("SELECT count(id) AS cnt, MONTH(date_added) AS month FROM library GROUP BY MONTH(date_added) ORDER BY month ASC ;") ret = db.rs x = [] y = [] i = 0 for song in ret: x.append(song['month']) y.append(song['cnt']) fig, ax = plt.subplots() plt.bar(x, y) plt.xticks(x, ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',)) plt.show()
def _load_group_rights(self): from globals import db rights = Rights(self.unit_rights) groups = [] query = db.query('groups') query.join('LEFT JOIN unit_groups USING (group_id)') query.where('unit_groups.unit_id = %s', self.unit_id) for group, grights in query.fetchcols(('group_name', 'rights')): groups.append(group) rights.add(grights) self._groups = groups self._rights = rights
def CoronaGrowthChart(): print("Gathering data... ") y = [] x = [] i = 0 # db.query("SELECT COUNT(id) count FROM library WHERE (YEAR(date_added) = " + str(year) + " AND MONTH(date_added) < " + str(mo) + ") OR YEAR(date_added) < " + str(year) + ";") # total = db.rs[0]['count'] db.query("SELECT count(id) count, DATE(date_added) date FROM library WHERE YEAR(date_added) = 2020 group by date ORder by date asc;") for item in db.rs: x.append(item['date']) y.append(item['count']) # change = count - total # x.append(2020+ mo/12) # y.append(count) print("Be sure to close chart window before continuing.") plt.plot(x,y) plt.gcf().autofmt_xdate() plt.show()
def GenreStreamGraphPlays(): genres = [] years = [] counts = [] db.query("SELECT COUNT(id) count, genre FROM library GROUP BY genre ORDER BY count DESC LIMIT 27;") for genre in db.rs: genres.append(genre['genre']) data = [] db.query("SELECT DISTINCT YEAR(listen_date) year FROM listening_history ORDER BY year;") years = [item['year'] for item in db.rs] print("Gathering data... This might take a bit") for year in years: for month in range(1,13): for genre in genres: db.query("SELECT count(record_id) count FROM listening_history LEFT JOIN library ON track_id = id WHERE genre = '" + genre + "' AND MONTH(listen_date) = " + str(month) + " AND YEAR(listen_date) = " + str(year)+ ";") for item in db.rs: if(month<10): s_month = "0" + str(month) else: s_month = str(month) date = str(year) + "-" + s_month# + "-01T01:00:00.000Z" # print(date) temp = {"date": date, "genre":genre, "count": item['count']} data.append(temp) # , scale=alt.Scale(domain=(0,1000)) df = pd.DataFrame(data=data) streamgraph= alt.Chart(df,width=1250, height=750).mark_area(interpolate="basis").encode( alt.X('date:T', axis=alt.Axis( domain=False, tickSize=0) ), alt.Y('count:Q', stack='center', axis=alt.Axis(labels=False, domain=False, tickSize=0)), alt.Color('genre:N', scale=alt.Scale(scheme='tableau20'), ),tooltip=['genre','count'] ).interactive().configure(background='#DDEEFF') streamgraph.save("list_stream.html") webbrowser.open('file://' + os.path.realpath("list_stream.html"))
def GenreGrowthChart(): genres = [] years = [] counts = [] db.query("SELECT COUNT(id) count, genre FROM library GROUP BY genre ORDER BY count DESC LIMIT 10;") for genre in db.rs: genres.append(genre['genre']) data = [] db.query("SELECT DISTINCT YEAR(date_added) year FROM library ORDER BY year;") years = [item['year'] for item in db.rs] # years = [2020] for year in years: for month in range(1,12): if year == 2020 and month > 4: break; for genre in genres: db.query("SELECT count(id) count FROM library WHERE genre = '" + genre + "' AND MONTH(date_added) = " + str(month) + " AND YEAR(date_added) = " + str(year)+ ";") for item in db.rs: if(month<10): s_month = "0" + str(month) else: s_month = str(month) date = str(year)+ "-" + s_month + "T00:00:00.000Z" data.append({"date": date, "genre":genre, "count": item['count']}) df = pd.DataFrame(data=data) print(df) lines = alt.Chart(df,width=1250, height=750).mark_line().encode( x='date', y='count', color='genre', # strokeDash='genre', tooltip=['genre','count'] ).interactive() lines.save("genre_grow.html") webbrowser.open('file://' + os.path.realpath("genre_grow.html"))
def load(self, *ids): from globals import db query = db.query(self.entity_name) query.where_in(self.id_col, ids) for row in query.fetchall(): self.add(row)
def LibToDB(self): #load the library info into the database, updating if it already exists there bar = progressbar.ProgressBar(maxval=len(self.library), widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() i = 0 for track in self.library: i += 1 bar.update(i) stmt = "SELECT id, play_count, skip_count " # first check if the song exists in stmt += " FROM library " # the library already. stmt += "WHERE name = '" + track.name.replace("'", "''") stmt += "' AND artist = '" + track.artist.replace("'", "''") stmt += "' AND album = '" + track.album.replace("'", "''") stmt += "' AND track_num = " + str(track.track_num) + ";" db.query(stmt) if (db.rs == []): # if the record does not exist, stmt = " INSERT INTO library " # insert it into the library table stmt += " (name, artist, album, album_artist, " stmt += " comp, genre, kind, total_time, track_num," stmt += " track_count, year, date_added, play_count, " stmt += " play_date, rel_date, skip_count, loved)" stmt += " VALUES ('" + track.name.replace( "'", "''") + "', '" + track.artist.replace( "'", "''") + "', '" + track.album.replace( "'", "''") + "', '" + track.alb_artist.replace( "'", "''") + "', '" stmt += track.comp.replace( "'", "''") + "', '" + track.genre.replace( "'", "''") + "', '" + track.kind.replace( "'", "''") + "', " + str(track.total_time) + ", " stmt += str(track.track_num) + ", " + str( track.track_count) + ", " + str( track.year) + ", DATE_ADD('" + str( track.date_add) + "', INTERVAL -4 HOUR), " stmt += str(track.play_count) + ", DATE_ADD('" + str( track.play_date ) + "', INTERVAL -4 HOUR), DATE_ADD('" + str( track.rel_date) + "', INTERVAL -4 HOUR), " + str( track.skip_count) + "," + str(track.loved) + ");" db.execute(stmt) last_id = db.cursor.lastrowid if ( track.play_count > 0 ): # if there are plays, insert last play into listening history if (int(track.play_date[:4]) > 2000): stmt = "INSERT INTO listening_history (track_id, listen_date, listen_count)" stmt += " VALUES ( " + str(last_id) + "," stmt += " DATE_ADD('" + str( track.play_date) + "', INTERVAL -4 HOUR)," stmt += " 1);" db.execute(stmt) else: # if the record does exist track_id = db.rs[0]['id'] # track id db_pc = db.rs[0]['play_count'] # current listen count if (db_pc == None): db_pc = 0 db_sc = db.rs[0]['skip_count'] # current skip count if (db_sc == None): db_sc = 0 if (track.play_count > db_pc or track.skip_count > db_sc): # if there are new plays or skips if (track.play_count > db_pc): # for new plays only: #q = "SELECT record_id FROM listening_history WHERE track_id = " + str(track_id) + " AND listen_date = '" + str(track.play_date) + "';" #db.query(q) #if(db.rs == []): # new listen record in listening history stmt = "INSERT INTO listening_history (track_id, listen_date, listen_count)" stmt += " VALUES ( " + str(track_id) + "," stmt += " DATE_ADD('" + str( track.play_date) + "', INTERVAL -4 HOUR)," stmt += " " + str(track.play_count) + ");" db.execute(stmt) # for new plays and new skips: stmt = "UPDATE library " # update data in library stmt += " SET play_count = " + str( track.play_count) + ", " stmt += " total_time = " + str( track.total_time) + ", " stmt += " play_date = DATE_ADD('" + str( track.play_date) + "', INTERVAL -4 HOUR), " stmt += " skip_count = " + str( track.skip_count) + ", " stmt += " loved = " + str( track.loved) + " " stmt += "WHERE id = " + str(track_id) + ";" db.execute(stmt) bar.finish()
def StripPlot(year=None): print("Generating strip plot ...") #year - oldest year added for songs you want to look at, takes all songs if null db.query("SELECT count(record_id) count, genre FROM listening_history LEFT JOIN library on track_id = id GROUP BY genre ORDER BY count DESC LIMIT 10;") genres = [] for item in db.rs: genres.append(item['genre']) songs = [] max_play = 0 for genre in genres: q = "SELECT name, artist, loved, genre, play_count, date_added FROM library WHERE genre = '" + genre + "'" if year != None: q += " AND YEAR(date_added) > " + str(year) + ";" else: q += ";" db.query(q) for item in db.rs: if item['loved'] == 0: loved = False else: loved = True temp = {"name": item['name'],"artist":item['artist'], "genre": item['genre'].lower(), "play_count": item['play_count'],"loved":loved,"sk":item['date_added']} if(int(item['play_count']) > max_play): max_play = int(item['play_count']) songs.append(temp) if(max_play < 50): max_play = 50 df = pd.DataFrame(data=songs) stripplot = alt.Chart(df, width=100, height=500).mark_point(size=30,filled=True).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y('play_count:Q', scale=alt.Scale(domain=(0,max_play))), color=alt.Color('loved:N',legend=None),tooltip=['name','artist','play_count'], # size ='count:Q', shape = alt.Shape( "loved:N", scale = alt.Scale(range=["circle", "triangle"],zero=True)), #filled='true:B', column=alt.Column( 'genre:N', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), spacing=10, ), ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())' ).configure_facet( spacing=0 ).interactive().configure_view( stroke=None ) stripplot.save("StripPlot.html") webbrowser.open('file://' + os.path.realpath("StripPlot.html"))