def move_events(): db = DbStore() total = util.get_spotify_db().events.find().count() for i, event in enumerate(util.get_spotify_db().events.find()): if i % 100 == 0: print("Moving events {}%".format(util.percent(i, total))) db.add_event(event) db.commit()
def track_csv(out_name): spotify = util.get_spotify_db() tracks = spotify.tracks.find({}, sort=[("played_at", pymongo.DESCENDING)]) with open(out_name, 'w', newline='', encoding='utf-8') as f: header = ["played_at", "track_id", "name", "duration_ms", "danceability", "energy", "key", "loudness", "mode", "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo", "time_signature", "track_popularity", "album_id", "artist_popularity", "artist", "genres"] writer = csv.writer(f) writer.writerow(header) empty_feat = {} empty_feat["danceability"] = "" empty_feat["energy"] = "" empty_feat["key"] = "" empty_feat["loudness"] = "" empty_feat["mode"] = "" empty_feat["speechiness"] = "" empty_feat["acousticness"] = "" empty_feat["instrumentalness"] = "" empty_feat["liveness"] = "" empty_feat["valence"] = "" empty_feat["tempo"] = "" empty_feat["time_signature"] = "" for track in tracks: # Get artist data artist = spotify.artists.find_one({"id": track["track"]["artists"][0]["id"]}) feat = spotify.features.find_one({"id": track["track"]["id"]}) if feat is None: logging.info("NO FEATURE FOR {}".format(track["track"]["id"])) feat = empty_feat data = [ str(track["played_at"]), track["track"]["id"], track["track"]["name"], track["track"]["duration_ms"], feat["danceability"], feat["energy"], feat["key"], feat["loudness"], feat["mode"], feat["speechiness"], feat["acousticness"], feat["instrumentalness"], feat["liveness"], feat["valence"], feat["tempo"], feat["time_signature"], track["track"]["popularity"], track["track"]["album"]["id"], artist["popularity"], artist["name"], ",".join(artist["genres"]) ] writer.writerow(data)
def print_recent(): spotify = util.get_spotify_db() tracks = spotify.tracks.find({}, sort=[("played_at", pymongo.DESCENDING)]) for i, track in enumerate(tracks): print("[{}]: {} - {}".format( track["played_at"], track["track"]["artists"][0]["name"], track["track"]["name"])) if i == 9: return
def import_from_mongo(): db = DbStore() i = 0 for track in util.get_spotify_db().tracks.find(): db.add_play_from_mongo(track) i += 1 if i % 100 == 0: print("Added {}".format(i))
def import_context_from_mongo(): db = DbStore() for i, track in enumerate(util.get_spotify_db().tracks.find()): dt = track["played_at"].isoformat() context = track.get("context") if context is not None and "uri" in context: db.add_context(dt, context["uri"]) if i % 100 == 0: print("Added {}".format(i)) db.conn.commit()
def get_data(): spotify = util.get_spotify_db() tracks = [] for track in spotify.tracks.find(): tracks.append({ "id": track["track"]["id"], "name": track["track"]["name"], "artist": track["track"]["artists"][0]["name"], "date_played": track["played_at"].date() }) return tracks
def update(): spotify = util.get_spotify_db() creds = get_credentials() logging.info("Updating features...") update_features(creds, spotify) logging.info("Updating atists...") update_artists(creds, spotify) logging.info("Updating albums...") update_albums(creds, spotify)
def get_tracks(date, timezone=None): if timezone is None: timezone = pytz.timezone("GMT") start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0) end = datetime.datetime(date.year, date.month, date.day, 23, 59, 59, 0) start = timezone.localize(start) end = timezone.localize(end) client = pymongo.MongoClient("localhost", 27017) spotify = util.get_spotify_db() query = {"played_at": {"$gt": start, "$lt": end}} return spotify.tracks.find(query)
def get_unknown_track_ids(): spotify = util.get_spotify_db() # Ids from player database # If never played for > 30 secs then doesn't exist in tracks collection player_ids = db.player_store.store().player_distinct_track_ids() track_ids = spotify.tracks.find().distinct("track.id") # Combine and remove duplicates all_ids = list(set(player_ids + track_ids)) # See what we have in db full_ids = spotify.full_tracks.find().distinct("id") return [item for item in all_ids if item not in full_ids]
def main(): spotify = util.get_spotify_db() all = spotify.tracks.find({}) n = all.count() print("Processing {} docs".format(n)) i = 0 for track in all: i = i + 1 clean = clean_track(track["track"]) spotify.tracks.delete_one({"_id": track["_id"]}) track["track"] = clean spotify.tracks.insert(track) if i % 100 == 0: print("[{}%] done".format(util.percent(i, n)))
def update_full_tracks(creds=None): if creds is None: creds = get_credentials() spotify = util.get_spotify_db() ids = get_unknown_track_ids() if len(ids) > 0: logging.info("[TRACK UPDATE] found {} tracks to update".format(len(ids))) tracks = get_tracks(ids, creds) spotify.full_tracks.insert_many(tracks) logging.info("[TRACK UPDATE] done inserting tracks") else: logging.info("[TRACK UPDATE] no tracks to update") return len(ids)
def main(): spotify = util.get_spotify_db() if len(sys.argv) <= 1: print("Provide action") return action = sys.argv[1] if action == "print": add_info_to_events() events = spotify.events.find() print_events(events) elif action == "refresh": refresh_events(spotify) else: print("Not sure what you mean mate")
def add_info_to_events(): spotify = util.get_spotify_db() track_cache = {} events_without_track = spotify.events.find({"state.track_id": {"$exists": True}, "track": {"$exists": False}}) events_without_prev_track = spotify.events.find( {"prev_track_id": {"$exists": True}, "prev_track": {"$exists": False}}) logging.info("Found {} without track, {} without prev track" .format(events_without_track.count(), events_without_prev_track.count())) for event in events_without_track: t_id = event["state"]["track_id"] info = get_track_info(spotify, t_id, track_cache) spotify.events.update( {"_id": event["_id"]}, {"$set": {"track": info["track"], "artist": info["artist"]} }) logging.info("Finished adding info to track events") for event in events_without_prev_track: if "prev_track_id" not in event or event["prev_track_id"] is None: logging.info("No prev_track_id exists in event {}".format(event["_id"])) continue t_id = event["prev_track_id"] info = get_track_info(spotify, t_id, track_cache) spotify.events.update( {"_id": event["_id"]}, {"$set": {"prev_track": info["track"], "prev_artist": info["artist"]} }) logging.info("Finished adding info to prev track events")
def load(path): spotify = util.get_spotify_db() ids = [] data = None with open(path) as f: data = json.loads(f.read()) ids = [] creds = get_credentials() for track in data: if not "trackId" in track: continue ids.append(track["trackId"]) ids = list(set(ids)) full_tracks = get_tracks(ids, creds) track_by_id = {} for f_track in full_tracks: track_by_id[f_track["id"]] = f_track print("GOT {} tracks".format(len(full_tracks))) states = [] for track in data: if "trackId" not in track: continue state = { "played_at": track["time"], "track": track_by_id[track["trackId"]] } states.append(state) spotify.tracks.insert_many(states)
def track_ids(): return util.get_spotify_db().tracks.distinct("track.id")
def __init__(self): self.spotify = util.get_spotify_db()
def basic(): spotify = util.get_spotify_db() tracks = spotify.tracks.find({}, sort=[("played_at", pymongo.DESCENDING)]) for track in tracks: print("[{}] {} - {}".format(track["played_at"], track["track"]["artists"][0]["name"], track["track"]["name"]))
import util import logging from wordcloud import WordCloud from collections import Counter import datetime import math logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S', filename='output.log') logging.getLogger().addHandler(logging.StreamHandler()) logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) db = util.get_spotify_db() def main(): all_genres = [] freq = {} genres = open("genres.txt", "r").read().split(",") for genre in genres: # Get artist data all_genres.append(genre) if genre not in freq: freq[genre] = 1 else: freq[genre] += 1