def stats_by_artist_overall(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') artist_repeat = 1 while artist_repeat == 1: selected_artist = input("Enter a SOLO ARTIST or a BAND: ") # substr(song, 0, 25) as "Song", db_response = pd.read_sql_query(f''' SELECT id, substr(performer, 0, 25) as "Performer", avg(week_position) as "Average Position", min(peak_position) as "Best Position", sum(weeks_on_chart) as "Total Wks on Chrt", min(weekid) as "First Appearance", max(weekid) as "Last Appearance" FROM music WHERE performer LIKE "%{selected_artist}%" GROUP BY performer ''', db) os_stuff.clear() if len(db_response) != 0: artist_repeat = 0 else: print('No results found') pause_me = input('Press ENTER to Continue') pass pd.set_option('display.max_rows', None) artist_data = pd.DataFrame(db_response) artist_data = artist_data.set_index('id') print(artist_data) pause_me = input('Press ENTER to continue.') db.close()
def read_print_csv(data_file): os_stuff.clear() print('TASK 1 - READ A CSV FILE INTO MEMORY') print('-------------------------------------') print(' ') print(f'The file {data_file} will be read from the current directory and each line added to a list, ' f'which will, then, be returned') print(' ') pause_me = input('Press ENTER key to begin.') print(' ') print('Converting date formats in the CSV to YEAR-MONTH-DAY. This will take a moment.') print('Reading CSV') my_data_list = pd.read_csv(data_file) print('Converting Data to DateFrame') music_df = pd.DataFrame(my_data_list) print('Converting Date format') music_df['WeekID'] = pd.to_datetime(music_df['WeekID']) music_df['WeekID'] = music_df['WeekID'].dt.strftime('%Y-%m-%d') print(' ') print('The data has been read from CSV & loaded into the list') print(f'There are {len(my_data_list)} records.') pause_me = input('Press ENTER to verify this? ') os_stuff.clear() print(music_df.head()) pause_me = input('Press ENTER key to continue to the next step.') return music_df
def load_data_into_db(db_name, music_df): os_stuff.clear() print( "Now, we'll load that data into the a Pandas DataFrame and then send it into a SQLite DB." ) print(" ") pause_me = input("Press ENTER key to continue.") def insert_data(db, music_df): # music_df = pd.DataFrame(data_list) music_df.columns = music_df.columns\ .str.strip()\ .str.lower()\ .str.replace(' ', '_')\ .str.replace('(', '')\ .str.replace(')', '') music_df.to_sql('music', con=db, if_exists='replace', index_label='id') db = db_tools.sql_connection(db_name) print('Putting the data into the base') insert_data(db, music_df) db.close() print('') print("Whew... So, that's done.") pause_me = input('Press ENTER key to continue.')
def intro(db_name): os_stuff.clear() print("TASK 3 - QUERYING THE DB") print('------------------------') print(' ') print(f"In this step, you'll walk through pulling information from the DB file ({db_name})" f" in a structured manner.") print(' ') pause_me = input('Press ENTER key to begin.')
def query_by_artist(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') cur = db.cursor() selected_artist = input("Enter a SOLO ARTIST or a BAND: ") ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%" AND "week_position" <= 10 GROUP BY songid ORDER BY weekid ASC' cur.execute(ex_str) db_response = cur.fetchall() print_db_results(db_response) db.close()
def query_menu(db_name): repeat = 1 while repeat == 1: os_stuff.clear() print('******************** PICK A QUERY ********************') print('BASIC SQL QUERIES') print('01 - Songs in Top 10 for a given year') print('02 - Songs in Top 10 for a given year and month') print('03 - Songs in Top 10 for a given Artist') print('') print('MatPlotLib/Pandas QUERIES') print('11 - Plots Song Popularity over time') print('12 - Plots Artist Popularity over time') print('13 - Stats for Songs per Artist') print('14 - Stats for Artist') print('') print('99 - EXIT') print('') selection = input("Query number: ") if selection.isdigit(): if int(selection) == 1: top_10_by_year(db_name) repeat = 1 elif int(selection) == 2: top_10_by_yr_and_mo(db_name) repeat = 1 elif int(selection) == 3: query_by_artist(db_name) repeat = 1 elif int(selection) == 11: plot_song_pop_over_time(db_name) repeat = 1 elif int(selection) == 12: plot_artist_pop_over_time(db_name) repeat = 1 elif int(selection) == 13: stats_by_artist(db_name) repeat = 1 elif int(selection) == 14: stats_by_artist_overall(db_name) repeat = 1 elif int(selection) == 99: break else: pass else: pass print('GOOD BYE')
def plot_song_pop_over_time(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') cur = db.cursor() artist_repeat = 1 while artist_repeat == 1: selected_artist = input("Enter a SOLO ARTIST or a BAND: ") ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%" GROUP BY songid ORDER BY weekid ASC' cur.execute(ex_str) os_stuff.clear() db_response = cur.fetchall() if len(db_response) != 0: artist_repeat = 0 else: print('No results found') pause_me = input('Press ENTER to Continue') pass songs = [] menu_counter = 1 os_stuff.clear() for line in db_response: print(f'{menu_counter}. {line[4]} by {line[5]} ({line[2][:4]})') songs.append(line[4]) menu_counter += 1 repeat = 1 while repeat == 1: song_selection = input("Select a song by the number: ") if song_selection.isdigit(): if 0 < int(song_selection) < len(songs)+1: repeat = 0 else: pass else: pass song_index = int(song_selection) - 1 print(f"You chose {songs[song_index]}") ex_str = f'SELECT * FROM music WHERE performer LIKE "%{selected_artist}%" AND song LIKE "{songs[song_index]}" ORDER BY weekid ASC' cur.execute(ex_str) os_stuff.clear() db_response = cur.fetchall() dates = [] chart_positions = [] for line in db_response: #line_date = line[2] #dates.append(datetime.strptime(line_date, '%m/%d/%Y')) dates.append(line[2]) chart_positions.append(line[3]) dates, chart_positions = zip(*sorted(zip(dates, chart_positions))) pd.plotting.register_matplotlib_converters() plt.plot_date(dates, chart_positions, '-') plt.gca().invert_yaxis() plt.show() db.close()
def plot_artist_pop_over_time(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') cur = db.cursor() artist_repeat = 1 while artist_repeat == 1: selected_artist = input("Enter a SOLO ARTIST or a BAND: ") search_string1 = f''' SELECT Performer, week_position as "Position", song as "Song", songid as "ID", min(weekid) as "Date", count(*) as "Count" FROM music WHERE Performer LIKE "{selected_artist}" AND week_position <= 10 GROUP BY songid ''' os_stuff.clear() cur.execute(search_string1) db_response1 = cur.fetchall() if len(db_response1) != 0: artist_repeat = 0 else: print('No results found') pause_me = input('Press ENTER to Continue') pass dates = [] songs = [] counts = [] db_df = pd.DataFrame(db_response1) pd.set_option('display.expand_frame_repr', False) os_stuff.clear() print(db_df) pause_me = input("ENTER") for line in db_response1: #line_date = line[4][:10] #dates.append(datetime.strptime(line_date, '%m/%d/%Y').date()) dates.append(line[4]) songs.append(line[2]) counts.append(line[5]) dates, songs, counts = zip(*sorted(zip(dates, songs, counts))) pd.plotting.register_matplotlib_converters() plt.plot_date(dates, counts, '-') plt.gca() plt.show() db.close() os_stuff.clear()
def top_10_by_yr_and_mo(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') cur = db.cursor() cur.execute('SELECT min(WeekID) FROM music') min_year = int(cur.fetchone()[0][:4]) cur.execute('SELECT max(WeekID) FROM music') max_year = int(cur.fetchone()[0][:4]) os_stuff.clear() print("DB contains data from " + str(min_year) + " to " + str(max_year) + ".") repeat = 1 while repeat == 1: selected_year = input("Enter a year: ") if not selected_year.isdigit(): print("Invalid year!") pause_me = input("Press any key to continue") elif int(selected_year) < min_year or int(selected_year) > max_year: print("Invalid year!") pause_me = input("Press any key to continue") else: repeat = 0 repeat = 1 while repeat == 1: selected_month = input("Enter a month as a number: ") if not selected_month.isdigit(): print("Invalid month!") pause_me = input("Press any key to continue") elif int(selected_month) < 1 or int(selected_month) > 12: print("Invalid month!") pause_me = input("Press any key to continue") else: repeat = 0 if len(selected_month) == 1: selected_month = '0' + selected_month else: pass date_string = selected_year + '-' + selected_month + '-' ; db_response = date_query(cur, date_string) print_db_results(db_response) db.close()
def top_10_by_year(db_name): os_stuff.clear() db = db_tools.sql_connection(db_name) print('opening data...') cur = db.cursor() cur.execute('SELECT min(WeekID) FROM music') min_year = int(cur.fetchone()[0][:4]) cur.execute('SELECT max(WeekID) FROM music') max_year = int(cur.fetchone()[0][:4]) os_stuff.clear() print("DB contains data from " + str(min_year) + " to " + str(max_year) + ".") repeat = 1 while repeat == 1: selected_year = input("Enter a year to explore: ") if int(selected_year) < min_year or int(selected_year) > max_year: print("Invalid year!") pause_me = input("Press ENTER key to continue") else: repeat = 0 db_response = date_query(cur, selected_year) print_db_results(db_response) db.close()