def get_most_listened_artists(number_of_artists=1, db_file=HISTORY_DATABASE, listens=None, artists=None): """Returns the most listened to artist(s) as a tuple with description and sorted pandas.Dataframe result shape: (description, pandas.Dataframe) Description: The <number_of_artists> most listened to artists in the dataset Dataframe shape: id_x artist_id artist_name count int int string int ... ... ... ... """ description = "The {} most listened to artists in the dataset"\ .format(number_of_artists) if listens is None or artists is None: engine = dbt.get_connectable(db_file) if listens is None: listens = pd.read_sql_table("listens", con=engine) if artists is None: artists = pd.read_sql_table("artists", con=engine) count = listens[["id", "artist_id"]].groupby("artist_id").count()\ .rename(index=int, columns={"id": "count"}) data = pd.merge(artists[["id", "artist_name"]], count[["count"]], left_on="id", right_on="artist_id")\ .nlargest(number_of_artists, "count") return (description, data)
def read_db(): engine = dbt.get_connectable(HISTORY_DATABASE) listens = pd.read_sql_table("listens", con=engine) tracks = pd.read_sql_table("tracks", con=engine) albums = pd.read_sql_table("albums", con=engine) artists = pd.read_sql_table("artists", con=engine) return listens, tracks, albums, artists
def get_num_artists(artists=None, db_file=HISTORY_DATABASE): """Returns number of artists in the dataframe or database. If artists is set db_file will be ignored. """ description = "Number of unique artists in dataset" if artists is None: engine = dbt.get_connectable(db_file) artists = pd.read_sql_table("artists", con=engine) num_artists = artists.id.nunique() return (description, num_artists)
def get_num_tracks(tracks=None, db_file=HISTORY_DATABASE): """Returns number of tracks in the dataframe og database. If tracks is set db_file will be ignored. """ description = "Number of unique tracks in dataset" if tracks is None: engine = dbt.get_connectable(db_file) tracks = pd.read_sql_table("tracks", con=engine) num_tracks = tracks.id.nunique() return (description, num_tracks)
def get_num_listens(listens=None, db_file=HISTORY_DATABASE): """Returns number of listens in the dataframe or database. If listens is set db_file will be ignored. """ description = "Total number of listens in dataset" if listens is None: engine = dbt.get_connectable(db_file) listens = pd.read_sql_table("listens", con=engine) num_listens = listens.id.nunique() return (description, num_listens)