예제 #1
0
def stats_by_artist_overall(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    artist_repeat = 1
    while artist_repeat == 1:
        selected_artist = input("Enter a SOLO ARTIST or a BAND: ")
        # substr(song, 0, 25) as "Song",
        db_response = pd.read_sql_query(f'''
            SELECT id, 
                substr(performer, 0, 25) as "Performer",
                avg(week_position) as "Average Position", 
                min(peak_position) as "Best Position", 
                sum(weeks_on_chart) as "Total Wks on Chrt", 
                min(weekid) as "First Appearance",
                max(weekid) as "Last Appearance"
            FROM music 
            WHERE performer 
            LIKE "%{selected_artist}%" 
            GROUP BY performer
            ''', db)
        os_stuff.clear()
        if len(db_response) != 0:
            artist_repeat = 0
        else:
            print('No results found')
            pause_me = input('Press ENTER to Continue')
            pass
    pd.set_option('display.max_rows', None)
    artist_data = pd.DataFrame(db_response)
    artist_data = artist_data.set_index('id')

    print(artist_data)
    pause_me = input('Press ENTER to continue.')
    db.close()
def read_print_csv(data_file):
    os_stuff.clear()
    print('TASK 1 - READ A CSV FILE INTO MEMORY')
    print('-------------------------------------')
    print(' ')
    print(f'The file {data_file} will be read from the current directory and each line added to a list, '
          f'which will, then, be returned')
    print(' ')
    pause_me = input('Press ENTER key to begin.')
    print(' ')
    print('Converting date formats in the CSV to YEAR-MONTH-DAY. This will take a moment.')
    print('Reading CSV')
    my_data_list = pd.read_csv(data_file)
    print('Converting Data to DateFrame')
    music_df = pd.DataFrame(my_data_list)
    print('Converting Date format')
    music_df['WeekID'] = pd.to_datetime(music_df['WeekID'])
    music_df['WeekID'] = music_df['WeekID'].dt.strftime('%Y-%m-%d')
    print(' ')
    print('The data has been read from CSV & loaded into the list')
    print(f'There are {len(my_data_list)} records.')
    pause_me = input('Press ENTER to verify this? ')
    os_stuff.clear()
    print(music_df.head())
    pause_me = input('Press ENTER key to continue to the next step.')
    return music_df
예제 #3
0
def load_data_into_db(db_name, music_df):
    os_stuff.clear()
    print(
        "Now, we'll load that data into the a Pandas DataFrame and then send it into a SQLite DB."
    )
    print(" ")
    pause_me = input("Press ENTER key to continue.")

    def insert_data(db, music_df):
        # music_df = pd.DataFrame(data_list)
        music_df.columns = music_df.columns\
            .str.strip()\
            .str.lower()\
            .str.replace(' ', '_')\
            .str.replace('(', '')\
            .str.replace(')', '')
        music_df.to_sql('music', con=db, if_exists='replace', index_label='id')

    db = db_tools.sql_connection(db_name)
    print('Putting the data into the base')
    insert_data(db, music_df)
    db.close()
    print('')
    print("Whew... So, that's done.")
    pause_me = input('Press ENTER key to continue.')
예제 #4
0
def intro(db_name):
    os_stuff.clear()
    print("TASK 3 - QUERYING THE DB")
    print('------------------------')
    print(' ')
    print(f"In this step, you'll walk through pulling information from the DB file ({db_name})"
          f" in a structured manner.")
    print(' ')
    pause_me = input('Press ENTER key to begin.')
예제 #5
0
def query_by_artist(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    cur = db.cursor()
    selected_artist = input("Enter a SOLO ARTIST or a BAND: ")
    ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%" AND "week_position" <= 10 GROUP BY songid ORDER BY weekid ASC'
    cur.execute(ex_str)
    db_response = cur.fetchall()
    print_db_results(db_response)
    db.close()
예제 #6
0
def query_menu(db_name):
    repeat = 1
    while repeat == 1:
        os_stuff.clear()
        print('******************** PICK A QUERY ********************')
        print('BASIC SQL QUERIES')
        print('01 - Songs in Top 10 for a given year')
        print('02 - Songs in Top 10 for a given year and month')
        print('03 - Songs in Top 10 for a given Artist')
        print('')
        print('MatPlotLib/Pandas QUERIES')
        print('11 - Plots Song Popularity over time')
        print('12 - Plots Artist Popularity over time')
        print('13 - Stats for Songs per Artist')
        print('14 - Stats for Artist')
        print('')
        print('99 - EXIT')
        print('')
        selection = input("Query number: ")
        if selection.isdigit():
            if int(selection) == 1:
                top_10_by_year(db_name)
                repeat = 1
            elif int(selection) == 2:
                top_10_by_yr_and_mo(db_name)
                repeat = 1
            elif int(selection) == 3:
                query_by_artist(db_name)
                repeat = 1
            elif int(selection) == 11:
                plot_song_pop_over_time(db_name)
                repeat = 1
            elif int(selection) == 12:
                plot_artist_pop_over_time(db_name)
                repeat = 1
            elif int(selection) == 13:
                stats_by_artist(db_name)
                repeat = 1
            elif int(selection) == 14:
                stats_by_artist_overall(db_name)
                repeat = 1
            elif int(selection) == 99:
                break
            else:
                pass
        else:
            pass
    print('GOOD BYE')
예제 #7
0
def plot_song_pop_over_time(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    cur = db.cursor()
    artist_repeat = 1
    while artist_repeat == 1:
        selected_artist = input("Enter a SOLO ARTIST or a BAND: ")
        ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%"  GROUP BY songid ORDER BY weekid ASC'
        cur.execute(ex_str)
        os_stuff.clear()
        db_response = cur.fetchall()
        if len(db_response) != 0:
            artist_repeat = 0
        else:
            print('No results found')
            pause_me = input('Press ENTER to Continue')
            pass
    songs = []
    menu_counter = 1
    os_stuff.clear()
    for line in db_response:
        print(f'{menu_counter}. {line[4]} by {line[5]} ({line[2][:4]})')
        songs.append(line[4])
        menu_counter += 1
    repeat = 1
    while repeat == 1:
        song_selection = input("Select a song by the number: ")
        if song_selection.isdigit():
            if 0 < int(song_selection) < len(songs)+1:
                repeat = 0
            else:
                pass
        else:
            pass
    song_index = int(song_selection) - 1
    print(f"You chose {songs[song_index]}")
    ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%" AND song LIKE "{songs[song_index]}" ORDER BY weekid ASC'
    cur.execute(ex_str)
    os_stuff.clear()
    db_response = cur.fetchall()
    dates = []
    chart_positions = []
    for line in db_response:
        #line_date = line[2]
        #dates.append(datetime.strptime(line_date, '%m/%d/%Y'))
        dates.append(line[2])
        chart_positions.append(line[3])
    dates, chart_positions = zip(*sorted(zip(dates, chart_positions)))
    pd.plotting.register_matplotlib_converters()
    plt.plot_date(dates, chart_positions, '-')
    plt.gca().invert_yaxis()
    plt.show()
    db.close()
예제 #8
0
def plot_artist_pop_over_time(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    cur = db.cursor()
    artist_repeat = 1
    while artist_repeat == 1:
        selected_artist = input("Enter a SOLO ARTIST or a BAND: ")
        search_string1 = f'''
            SELECT 
                Performer,  
                week_position as "Position", 
                song as "Song", 
                songid as "ID",
                min(weekid) as "Date",
                count(*) as "Count"
            FROM music 
            WHERE Performer
            LIKE "{selected_artist}" 
            AND
            week_position <= 10
            GROUP BY songid
        '''
        os_stuff.clear()
        cur.execute(search_string1)
        db_response1 = cur.fetchall()
        if len(db_response1) != 0:
            artist_repeat = 0
        else:
            print('No results found')
            pause_me = input('Press ENTER to Continue')
            pass
    dates = []
    songs = []
    counts = []
    db_df = pd.DataFrame(db_response1)
    pd.set_option('display.expand_frame_repr', False)
    os_stuff.clear()
    print(db_df)
    pause_me = input("ENTER")
    for line in db_response1:
        #line_date = line[4][:10]
        #dates.append(datetime.strptime(line_date, '%m/%d/%Y').date())
        dates.append(line[4])
        songs.append(line[2])
        counts.append(line[5])
    dates, songs, counts = zip(*sorted(zip(dates, songs, counts)))
    pd.plotting.register_matplotlib_converters()
    plt.plot_date(dates, counts, '-')
    plt.gca()
    plt.show()
    db.close()
    os_stuff.clear()
예제 #9
0
def top_10_by_yr_and_mo(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    cur = db.cursor()
    cur.execute('SELECT min(WeekID) FROM music')
    min_year = int(cur.fetchone()[0][:4])
    cur.execute('SELECT max(WeekID) FROM music')
    max_year = int(cur.fetchone()[0][:4])
    os_stuff.clear()
    print("DB contains data from " + str(min_year) + " to " + str(max_year) + ".")
    repeat = 1
    while repeat == 1:
        selected_year = input("Enter a year: ")
        if not selected_year.isdigit():
            print("Invalid year!")
            pause_me = input("Press any key to continue")
        elif int(selected_year) < min_year or int(selected_year) > max_year:
            print("Invalid year!")
            pause_me = input("Press any key to continue")
        else:
            repeat = 0
    repeat = 1
    while repeat == 1:
        selected_month = input("Enter a month as a number: ")
        if not selected_month.isdigit():
            print("Invalid month!")
            pause_me = input("Press any key to continue")
        elif int(selected_month) < 1 or int(selected_month) > 12:
            print("Invalid month!")
            pause_me = input("Press any key to continue")
        else:
            repeat = 0
    if len(selected_month) == 1:
        selected_month = '0' + selected_month
    else:
        pass
    date_string = selected_year + '-' + selected_month + '-' ;
    db_response = date_query(cur, date_string)
    print_db_results(db_response)
    db.close()
예제 #10
0
def top_10_by_year(db_name):
    os_stuff.clear()
    db = db_tools.sql_connection(db_name)
    print('opening data...')
    cur = db.cursor()
    cur.execute('SELECT min(WeekID) FROM music')
    min_year = int(cur.fetchone()[0][:4])
    cur.execute('SELECT max(WeekID) FROM music')
    max_year = int(cur.fetchone()[0][:4])
    os_stuff.clear()
    print("DB contains data from " + str(min_year) + " to " + str(max_year) + ".")
    repeat = 1
    while repeat == 1:
        selected_year = input("Enter a year to explore: ")
        if int(selected_year) < min_year or int(selected_year) > max_year:
            print("Invalid year!")
            pause_me = input("Press ENTER key to continue")
        else:
            repeat = 0
    db_response = date_query(cur, selected_year)
    print_db_results(db_response)
    db.close()