예제 #1
0
def get_most_listened_artists(number_of_artists=1,
                              db_file=HISTORY_DATABASE,
                              listens=None,
                              artists=None):
    """Returns the most listened to artist(s) as a tuple with description
    and sorted pandas.Dataframe

    result shape:
    (description, pandas.Dataframe)

    Description: The <number_of_artists> most listened to artists in the
    dataset

    Dataframe shape:
    id_x    artist_id   artist_name     count
    int     int         string          int
    ...     ...         ...             ...
    """
    description = "The {} most listened to artists in the dataset"\
        .format(number_of_artists)
    if listens is None or artists is None:
        engine = dbt.get_connectable(db_file)
    if listens is None:
        listens = pd.read_sql_table("listens", con=engine)
    if artists is None:
        artists = pd.read_sql_table("artists", con=engine)

    count = listens[["id", "artist_id"]].groupby("artist_id").count()\
        .rename(index=int, columns={"id": "count"})

    data = pd.merge(artists[["id", "artist_name"]], count[["count"]],
                    left_on="id", right_on="artist_id")\
        .nlargest(number_of_artists, "count")

    return (description, data)
예제 #2
0
def read_db():
    engine = dbt.get_connectable(HISTORY_DATABASE)
    listens = pd.read_sql_table("listens", con=engine)
    tracks = pd.read_sql_table("tracks", con=engine)
    albums = pd.read_sql_table("albums", con=engine)
    artists = pd.read_sql_table("artists", con=engine)
    return listens, tracks, albums, artists
예제 #3
0
def get_num_artists(artists=None, db_file=HISTORY_DATABASE):
    """Returns number of artists in the dataframe or database.

    If artists is set db_file will be ignored.
    """
    description = "Number of unique artists in dataset"
    if artists is None:
        engine = dbt.get_connectable(db_file)
        artists = pd.read_sql_table("artists", con=engine)
    num_artists = artists.id.nunique()
    return (description, num_artists)
예제 #4
0
def get_num_tracks(tracks=None, db_file=HISTORY_DATABASE):
    """Returns number of tracks in the dataframe og database.

    If tracks is set db_file will be ignored.
    """
    description = "Number of unique tracks in dataset"
    if tracks is None:
        engine = dbt.get_connectable(db_file)
        tracks = pd.read_sql_table("tracks", con=engine)
    num_tracks = tracks.id.nunique()
    return (description, num_tracks)
예제 #5
0
def get_num_listens(listens=None, db_file=HISTORY_DATABASE):
    """Returns number of listens in the dataframe or database.

    If listens is set db_file will be ignored.
    """
    description = "Total number of listens in dataset"
    if listens is None:
        engine = dbt.get_connectable(db_file)
        listens = pd.read_sql_table("listens", con=engine)
    num_listens = listens.id.nunique()
    return (description, num_listens)