def add_track_ids(db: DbStore, export_path=EXPORT_PATH): data = parse_file(export_path) print("Loaded {} plays fro GDPR. Removing short plays (<30s)...".format(len(data))) data = remove_short_plays(data) print("Now got {} plays. Removing tracks already in database...".format(len(data))) # FIXME # data = remove_recent(data, db.get_first_record()) print("Got {} plays to insert into database".format(len(data))) creds = get_credentials() count = len(data) failures = [] plays = [] cache = {} for i, item in enumerate(data): main_artist = item["artistName"].split(",")[0] cache_key = "{}:{}".format(item["trackName"], item["artistName"]) print("[{}%] ".format(int(100 * i / count)), end="") if cache_key in cache: print("Cache hit! ", end="") play = cache[cache_key] # shift into format for play else: play = get_track_play(db, creds, item["trackName"], main_artist) play = { "track": { "duration_ms": None, "popularity": None, "name": play["track_name"], "id": play["track_id"], "album": { "name": play["album_name"], "id": play["album_id"] }, "artists": [ { "name": play["main_artist_name"], "id": play["main_artist_id"], } ] } } # strip().replace(" ", "T") + "Z" if play: play["played_at"] = item["time"].isoformat() print("got {} by {} with id {}".format(item["trackName"], item["artistName"], play)) cache[cache_key] = play db.add_play(play) else: failures.append(item) print("FAIL FAIL FAIL") print(failures) with open("import.json", "w+", encoding="utf-8") as f: f.write(json.dumps(plays, default=json_serial))
def refresh_events(db: DbStore): config = util.config()["gen_events"] if not config["enable"]: logging.info("Skipping events as gen_events disbled") return logging.info("Refreshing events") last_event = db.latest_event() if last_event is not None: after = last_event["timestamp"] states = player_store.store().player_states_after_time_asc(after) logging.info("Processing events after {}({})".format(after, unix_to_iso(after))) initial_state = last_event else: logging.info("Processing all events (no existing events)") states = player_store.store().player_get_states_asc_timestamp() initial_state = {"state": {}} logging.info("Initial state for event gen: {}".format(initial_state.__str__())) logging.info("Num states to process = {}".format(len(states))) new_events = gen_events(initial_state, states) logging.info("Generated {} new events".format(len(new_events))) if len(new_events) > 0: for event in new_events: db.add_event(event) add_prev_track_id(db) logging.info("Deleting old states...") player_store.store().delete_states() logging.info("Done with gen_events")
def update_albums(db: DbStore, creds: Credentials): album_ids = db.incomplete_album_ids() full_albums = get_albums(album_ids, creds, UPDATE_SLEEP_MS) for album in full_albums: db.update_full_album(album["id"], album["release_date_precision"], album["release_date"], album["type"], album["images"][0]["url"], album["images"][1]["url"], album["images"][2]["url"], album["label"], album["popularity"], album["genres"])
def import_from_mongo(): db = DbStore() i = 0 for track in util.get_spotify_db().tracks.find(): db.add_play_from_mongo(track) i += 1 if i % 100 == 0: print("Added {}".format(i))
def import_context_from_mongo(): db = DbStore() for i, track in enumerate(util.get_spotify_db().tracks.find()): dt = track["played_at"].isoformat() context = track.get("context") if context is not None and "uri" in context: db.add_context(dt, context["uri"]) if i % 100 == 0: print("Added {}".format(i)) db.conn.commit()
def insert(tracks, db: DbStore): # Get last track listened to stored in db # This is to ensure we don't duplicate items in database latest_track_time = db.most_recent_played_at() logging.info( "Retrieved tracks from Spotify, filtering out ones played up to {}". format(latest_track_time)) if latest_track_time: tracks = remove_tracks_before_inc(tracks, latest_track_time) logging.info("Inserting {} tracks".format(len(tracks))) for track in tracks: logging.info("Adding track {}".format(util.track_to_string(track))) db.add_play(track)
def add_prev_track_id(db: DbStore): events = db.events_with_track_id() prev_track_id = events[0]["state"]["track_id"] n_updated = 0 for event in events[1:]: track_id = event["state"]["track_id"] if track_id != prev_track_id and event.get("prev_track_id") != prev_track_id: n_updated += 1 db.set_prev_track_id(event["state"]["timestamp"], prev_track_id) prev_track_id = track_id db.commit() logging.info("Added prev track info to {} items".format(n_updated))
def update_tracks(db: DbStore, creds: Credentials): track_ids = db.incomplete_track_ids() full_tracks = get_tracks(track_ids, creds, UPDATE_SLEEP_MS) features = get_track_features(track_ids, creds, UPDATE_SLEEP_MS) logging.info("Found {} tracks to update".format(len(track_ids))) for i, track in enumerate(full_tracks): feature = features[i] db.update_full_track(track["id"], track["track_number"], track["disc_number"], feature["valence"], feature["tempo"], feature["danceability"], feature["energy"], feature["instrumentalness"], feature["speechiness"], feature["time_signature"], feature["loudness"], feature["liveness"], feature["acousticness"])
def run_export(): if not config()["export"]["enable"]: logging.info("Export disabled, not running...") return logging.info("Enabled, running yoyoyo okokokok") os.chdir(get_path("upload")) prev_music = "" if os.path.exists("music.csv"): prev_music = open("music.csv", "r").read() db = DbStore() write_csv(db) if open("music.csv", "r").read() != prev_music: logging.info("music.csv changed so reuploading to github") # I know should use subprocess os.system("rm main.sqlite") os.system("cp ../main.sqlite main.sqlite") os.system('sqlite3 main.sqlite ".dump" > main.sql') os.system("git add main.sql music.csv") os.system("git commit -m \"Data upload at {}\"".format( datetime.datetime.now().isoformat())) os.system("git push -u origin master") else: logging.info("tracks.txt the same, no new music to upload")
def do_main(): db = DbStore() creds = get_credentials() download_and_store_history(db, creds) perform_update(db, creds) upload() logging.info("Done!")
def move_events(): db = DbStore() total = util.get_spotify_db().events.find().count() for i, event in enumerate(util.get_spotify_db().events.find()): if i % 100 == 0: print("Moving events {}%".format(util.percent(i, total))) db.add_event(event) db.commit()
def fix_tracks(creds: Credentials, db: DbStore): track_ids = db.track_ids() full_tracks = get_tracks(track_ids, creds, UPDATE_SLEEP_MS) logging.info("Downloaded all tracks, updating db...") for track in full_tracks: for artist in track["artists"]: db.add_track_artist(track["id"], artist["id"]) for artist in track["album"]["artists"]: db.add_album_artist(track["album"]["id"], artist["id"]) db.commit() logging.info("Fixed all tracks")
def get_track_play(db: DbStore, creds, track_name, artist_name): # Check local database first for id local_db_play = db.play_from_name_and_artist(track_name, artist_name) if local_db_play is not None: local_db_play["played_at"] = None local_db_play["context"] = None print("FROM LOCAL DB {} {}".format(track_name, artist_name)) return local_db_play # No luck, spotify search instead to get id query = "track:{} artist:{}".format(track_name, artist_name) result = search(query, "track", "tracks", creds) for track in result: if track["name"] == track_name: for artist in track["artists"]: if artist["name"] == artist_name: print("FROM SPOTIFY {} {}".format(track_name, artist_name)) return { "track": track, "played_at": None, # Fill this out later "context": None # No context in GDPR } return None
def update_artists(db: DbStore, creds: Credentials): artist_ids = db.incomplete_artist_ids() full_artists = get_artists(artist_ids, creds, UPDATE_SLEEP_MS) for artist in full_artists: db.update_full_artist(artist["id"], artist["popularity"], artist["followers"]["total"], artist["genres"])
def write_basic_track_file(db: DbStore): contents = "" for track in db.get_basic_tracks(): contents += "{},{},{}\n".format(track[0], track[1], track[2]) open("tracks.txt", "w+").write(contents)
def write_csv(db: DbStore): db.export_plays_as_csv(get_path("upload/music.csv"))
def main(): db = DbStore() print(add_track_ids(db, "StreamingHistory.json"))