def main(): first = False with open('./Datasets/Billboard/ContinuousChart.csv', 'a+') as outputFile: writer = csv.writer(outputFile, delimiter=',', lineterminator='\n') if first: writer.writerow( ["Date", "Title", "ArtistName", "Rank", "Weeks", "isNew"]) chart = billboard.ChartData('hot-100', date='2017-12-30') while '2016' not in chart.previousDate: chart = billboard.ChartData('hot-100', date=chart.previousDate) print(chart.previousDate, '\n', chart[:5]) for i in range(len(chart)): s = chart[i] song = Song(s.title.replace(",", " ").lower()) if s.isNew: song.new = '1' else: song.new = '0' song.artistName = s.artist.replace(",", " ").lower() song.rank = s.rank song.weeks = s.weeks song.date = chart.previousDate writer.writerow([ song.date, song.title, song.artistName, song.rank, song.weeks, song.new ]) time.sleep(10)
def billb(num, s_a): if (s_a == 'songs'): songs = billboard.ChartData('hot-100') output = "" i = 0 while i < int(num): output += str( i + 1) + " - " + songs[i].title + " -- " + songs[i].artist + "\n" print( str(i + 1) + " - " + songs[i].title + " -- " + songs[i].artist) i += 1 if (s_a == "artists"): artists = billboard.ChartData('artist-100') output = "" i = 0 while i < int(num): output += str(i + 1) + " - " + artists[i].artist + "\n" print(str(i + 1) + " - " + artists[i].artist) i += 1 return output
def read(self, range_tracker): """ Implements the method `apache_beam.io.iobase.BoundedSource.read`. Scrapes charts from the Billboard.com website via the Billboard.py. """ self.logger.info('Scraping Billboard.com charts.') chart = billboard.ChartData(CHART, date=START_DATE) self.logger.info('Scraping Billboard.com %s chart data since year %s', CHART, chart.previousDate[:4]) while chart.previousDate[:4] is not None and int( chart.previousDate[:4]) > LAST_YEAR: self.logger.info("Scraping chart %s for year %s", CHART, chart.previousDate) for track in chart: yield (chart.previousDate[:4], track.title + ' - ' + track.artist) try: chart = billboard.ChartData(CHART, chart.previousDate) except Exception as return_e: break
def gatherBillboardData(): chart = billboard.ChartData('hot-100') chart.previousDate = "2002-07-06" i = 0 fullDataFrame = pd.DataFrame(columns=[ "name", "artist", "song_id", "danceability", "energy", "loudness", "mode", "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature", "sections", "target", "chart_date", "popularity", "release_date", "weeks", "artist_popularity", "artist_followers", "number_of_artists", "list_of_artists", "key" ]) start = time.time() while (chart.previousDate): print(chart.previousDate) fullDataFrame = getChartDataFrame(chart, fullDataFrame) chart = billboard.ChartData('hot-100', chart.previousDate) i += 1 if (i == 960): #Collecting data from the last 960 weeks break if (i == 480): print("halveis") end = time.time() print("total time: " + str(end - start)) return fullDataFrame
def get_billboard_chart(): hot_100 = billboard.ChartData(HOT_100) top_hiphop = billboard.ChartData(TOP_HIPHOP) top_edm = billboard.ChartData(TOP_EDM) #Getting the charts top_100_df = get_song_df(hot_100.entries) top_edm_df = get_song_df(top_edm.entries) top_hiphop_df = get_song_df(top_hiphop) #Removing Duplicates and concatenating the dfs edm_title_list = top_edm_df.full_title.get_values() top_100_title_list = top_100_df.full_title.get_values() mask_edm = [entry not in top_100_title_list for entry in edm_title_list] top_edm_df = top_edm_df[mask_edm] hiphop_title_list = top_hiphop_df.full_title.get_values() mask_hiphop = [ entry not in top_100_title_list for entry in hiphop_title_list ] top_hiphop_df = top_hiphop_df[mask_hiphop] hiphop_title_list_new = top_hiphop_df.full_title.get_values() edm_title_list_new = top_edm_df.full_title.get_values() mask_edm_hiphop = [ entry not in edm_title_list_new for entry in hiphop_title_list_new ] top_hiphop_df = top_hiphop_df[mask_edm_hiphop] concat_billboard_charts = pd.concat( [top_100_df, top_edm_df, top_hiphop_df]).sort_values(by='rank') return concat_billboard_charts
def get_longest_no_one(): chart = billboard.ChartData('hot-100') prev = "" current = chart[0] top = 0 current_top = "" while chart.previousDate: if current.title == prev.title and prev.title == current.title: prev = current chart = billboard.ChartData('hot-100', chart.previousDate) current = chart[0] continue elif (current.weeks > top): top = current.weeks current_top = current prev = current chart = billboard.ChartData('hot-100', chart.previousDate) current = chart[0] else: prev = current chart = billboard.ChartData('hot-100', chart.previousDate) current = chart[0] print current_top, "Weeks: ", top
def get_chart_entries(playlist, date): chart = billboard.ChartData(playlist, str(date)) delta = timedelta(days=1) total_delta = timedelta(days=0) while len(chart.entries) == 0: total_delta += delta chart = billboard.ChartData(playlist, str(date + total_delta)) return (chart, total_delta)
def testDateRounding(self): """Checks that the Billboard website is rounding dates correctly: it should round up to the nearest date on which a chart was published. """ chart = billboard.ChartData("hot-100", date="1000-10-10") self.assertEqual(chart.date, "1958-08-04") # The first Hot 100 chart chart = billboard.ChartData("hot-100", date="1996-07-31") self.assertEqual(chart.date, "1996-08-03")
def test_date_rounding(self): """Checks that the Billboard website is rounding dates correctly: it should round up to the nearest date on which a chart was published. """ chart = billboard.ChartData('hot-100', date='1000-10-10') self.assertEqual(chart.date, '1958-08-04') # The first Hot 100 chart chart = billboard.ChartData('hot-100', date='1996-07-30') self.assertEqual(chart.date, '1996-08-03')
def get_charts(date): chart = billboard.ChartData('hot-100', date=date) while chart.previousDate: chart = billboard.ChartData('hot-100', chart.previousDate) for c in chart: song = vars(c) song["date"] = chart.date del song["image"] yield song print("Processed date ", chart.date)
def main(): # Check command line argument number for correctness. Should only be 1 if len(argv) != 2: print('Expected a file name as the only argument.') exit(1) # Parse the command line argument as the name of the file to save data to file_to_use = argv[1] # Make sure string passed in ends with .json if '.json' not in file_to_use: file_to_use += '.json' # Grab current chart current_chart = bd.ChartData('hot-100', date='2018-02-24') # Create a empty list of dicts of charts data = [] try: # Set up our loop to download all the data at once while current_chart.previousDate != '1958-08-04': print('Grabbing chart data %s as a dict...\n' % current_chart.date) # Grab the data and save it into a dict next_chart = grab_data(current_chart) # append that dict to the list of dicts data.append(next_chart) # Update current chart to get next set of data current_chart = bd.ChartData('hot-100', current_chart.previousDate) # Add the final chart to the list print('Grabbing chart data %s as a dict...\n' % current_chart.date) final_chart = grab_data(current_chart) data.append(final_chart) # Dump that list of dicts into the file given print('Appending to .json file...\n') create_json(data, file_to_use) print('Complete!\n') except (EOFError, KeyboardInterrupt): print('\nInterrupted. Removing incomplete data file and exiting...\n') exit(1)
def populate_artists_and_songs(num_of_weeks): #date in the form of YYYY-MM-DD ''' Can be used for testing purposes, populates the top_music.Artist/Song tables with exactly the same input drawned from Billboard.com, so "pull_week" always succeeds ''' chart = billboard.ChartData('hot-100') for x in range(0, num_of_weeks): for song in chart: insert_artist(parse_artist_name(song.artist), "", "1", "") artist_id = validate_artist(parse_artist_name(song.artist)) insert_song(song.title, artist_id, DUMMY_DATE) chart = billboard.ChartData('hot-100', chart.previousDate)
def testPreviousNext(self): """Checks that the date, previousDate, and nextDate attributes are parsed from the HTML, not computed. Specifically, we shouldn't assume charts are always published seven days apart, since (as this example demonstrates) this is not true. """ chart = billboard.ChartData("hot-100", date="1962-01-06") self.assertEqual(chart.date, "1962-01-06") self.assertEqual(chart.previousDate, "1961-12-25") chart = billboard.ChartData("hot-100", date="1961-12-25") self.assertEqual(chart.date, "1961-12-25") self.assertEqual(chart.nextDate, "1962-01-06")
def scrapeDataForYear(year, conn, onYearDone): finalDate = getFinalDate(year) lastChart = billboard.ChartData('hot-100', date=finalDate, fetch=True, timeout=30) prevYear = getPreviousYear(year) chart = lastChart while (chart.previousDate and prevYear not in chart.date): saveChart(chart, conn) onYearDone() time.sleep(10) chart = billboard.ChartData('hot-100', chart.previousDate, timeout=45)
def scrape_billboard(number_of_weeks): """ input - number of weeks to scrape into the past output - dictionary with spotify id as key, value is dictionary with title and artist values """ from spotify_utils import create_auth, set_track_data, set_album_data, set_artist_data, set_audio_features from utils import write_list_of_dictionaries_to_file import billboard import Tkinter chart = billboard.ChartData(name='hot-100') data = [] id_list = [] count = 0 sp = create_auth() while count != number_of_weeks: print "Getting data from: " + chart.date for track in chart: # Add only unique songs since many track stay on the chart for # weeks if track.spotifyID not in id_list and track.spotifyID: id_list.append(track.spotifyID) song = {} song['artist'] = track.artist song['title'] = track.title song['track_id'] = str(track.spotifyID) try: set_track_data(song, sp) set_album_data(song, sp) set_artist_data(song, sp) set_audio_features(song, sp) except: # if a spotify error occurs, refresh the token sp = create_auth() # write our data to a file just in case write_list_of_dictionaries_to_file( data, "BillboardBackupAt" + str(count) + ".csv") continue data.append(song) chart = billboard.ChartData(name='hot-100', date=chart.previousDate) count += 1 # Notify me when done Tkinter.Tk().bell() return data
def unique_top_tracks_generator(): track_dict = {} for year in range(1958, 2021): seen_tracks = set() chart = billboard.ChartData("hot-100", date=str(year) + "-12-31") while chart.previousDate[:4] == str(year): print(chart.previousDate) for track in chart: duplicate = track.title in seen_tracks if not duplicate: seen_tracks.add((track.title, track.artist)) time.sleep(THROTTLE_TIME) chart = billboard.ChartData("hot-100", chart.previousDate) track_dict[year] = list(seen_tracks) return track_dict
def get_songs(songs, chart_name): chart = billboard.ChartData(chart_name) while chart.previousDate and not chart.date.startswith('1979'): for s in chart: songs[s.title] = s.artist try: chart = billboard.ChartData(chart_name, chart.previousDate) except requests.exceptions.RequestException: time.sleep(10) continue print(chart) time.sleep(10)
def extract_billboard_top_100(previousDate): for k in range(10000): chart = billboard.ChartData('hot-100', previousDate) try: while chart.previousDate[:4] >= '1990': ll1 = [] time.sleep(2) chart = billboard.ChartData('hot-100', chart.previousDate) print("Correct", chart.date) for i in chart: ll1.append(tuple((i.title, i.artist, chart.date))) ll.append(ll1) except: print(chart.date) previousDate = chart.date
def makefile(iniyear, endyear, filepath): f = open(filepath, 'w') chart = billboard.ChartData('r-b-hip-hop-songs', '{}-01-01'.format(iniyear)) while chart.nextDate.split("-")[0] != endyear: f.write("Date: {}\n\n".format(chart.date)) for songs in chart: f.write("{} | {} | {} \n".format(songs.title, songs.artist, songs.weeks)) time.sleep(5) f.write("\n\n") chart = billboard.ChartData('r-b-hip-hop-songs', chart.nextDate) f.close()
def main(): hot100 = billboard.ChartData('hot-100') # billboard.ChartData('hot-100', date=None, fetch=True, max_retries=5, timeout=25) print("Billboard hot-100 chart from " + hot100.date + "\n---------------------------------------") for song in hot100: print(vars(song))
def getChart(site, genre, date): artists = [] print(f'{genre} - {date}') if site == 'billboard': chart = billboard.ChartData(genre, date=date) for song in chart: artists.append(song.artist) elif site == 'gaon': ranking = gaon.week(day_time=datetime.strptime(date, '%Y-%m-%d')) for rank in ranking: artists.append(rank['artist']) else: print('Site must be billboard, gaon, or oricon') exit() artists = [ re.sub('Featuring.*| [xX] .*| & .*', '', artist) for artist in artists ] map(str.strip, artists) artists = np.unique(artists) return artists
def get_billboard_list(self): """Get list of billboard songs. """ date_list = self.get_dates() file_exists = os.path.isfile(self.path + self.billboard_result_path) f = open(self.path + self.billboard_result_path, 'a') writer_top = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL) if not file_exists: writer_top.writerow(['title'] + ['artist'] + ['retrieve_date'] + ['rank'] + ['weeks']) f.close() for d in date_list: print(d) chart = billboard.ChartData(self.chart, date=d) f = open(self.path + self.billboard_result_path, 'a') writer_top = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL) chart_len = len(chart) for rank in range(0, chart_len - 1): result_row = [[ chart.entries[rank].title, chart.entries[rank].artist, d, chart.entries[rank].rank, chart.entries[rank].weeks ]] writer_top.writerows(result_row)
def TrackList(spotifyObject): cleanAry = [] chart = billboard.ChartData('hot-100') for song in chart: if "Featuring" in song.artist: artistString = song.artist cleanStr = artistString.replace("Featuring", "feat") if "Swae" in cleanStr: print("---------------------------") cleanStr = cleanStr.replace("Swae Lee Or", "") cleanAry.append((song.title, cleanStr)) elif "&" in song.artist: artistString = song.artist cleanStr = artistString.replace("&", "") cleanAry.append((song.title, cleanStr)) elif "Swae" in song.artist: cleanStr = song.artist.replace("Swae Lee Or", "") cleanAry.append((song.title, cleanStr)) else: cleanAry.append((song.title, song.artist)) trackAry = [] song = "" for i in range(len(cleanAry)): song = cleanAry[i][0] + " " + cleanAry[i][1] print(song) searchResults = spotifyObject.search(song, 1, 0, "track") trackID = searchResults['tracks']['items'][0]['id'] trackAry.append(trackID) #trackID =['2XW4DbS6NddZxRPm5rMCeY'] return trackAry
def test_datetime_date(): """Checks that ChartData correctly handles datetime objects as the date parameter. """ chart = billboard.ChartData('hot-100', datetime.date(2016, 7, 9)) assert len(chart) == 100 assert chart.date == '2016-07-09'
def music_api(): chart = billboard.ChartData('hot-100') chart_list = "<ol>" for song in chart : chart_list = chart_list + "<li>" + song.title + " - " + song.artist + "</li><br><br>" chart_list = Markup(chart_list + "</ol>") return chart_list
def testDatetimeDate(self): """Checks that ChartData correctly handles datetime objects as the date parameter. """ chart = billboard.ChartData("hot-100", datetime.date(2016, 7, 9)) self.assertEqual(len(chart), 100) self.assertEqual(chart.date, "2016-07-09")
def get_data(self): st_dt = datetime.datetime.strptime(self.start_date, '%Y-%m-%d') ed_dt = datetime.datetime.strptime(self.end_date, '%Y-%m-%d') if st_dt > ed_dt: print("Start date should not be greater than end date") sys.exit() calculated_days = ed_dt - st_dt diff_days = calculated_days.days if diff_days > 30: print("Days should not exceed more than 30") sys.exit() for chart_name in self.chart_name_list: current_dt = st_dt for date in range(diff_days): formatted_dt = datetime.datetime.strftime( current_dt, '%Y-%m-%d') chart_data = billboard.ChartData(chart_name, formatted_dt) parsed_data = self.parse_data(chart_data, chart_name) file_name = '/var/www/msg/api/billboard-charts/source/' + chart_name + '_' + self.start_date + '_' + self.end_date + '_' + str( time.time()) + '.json' self.write_to_json_file(file_name, parsed_data) parsed_data_csv = self.csv_data(chart_data, chart_name) file_name = '/var/www/msg/api/billboard-charts/source/' + chart_name + '_' + self.start_date + '_' + self.end_date + '_' + str( time.time()) + '.csv' self.write_to_csv_file(file_name, chart_name, parsed_data_csv) current_dt = current_dt + datetime.timedelta(days=1)
def get_lastfm_input_artist(self, artist_name: str) -> Optional[dict]: try: artist100_entries = billboard.ChartData('artist-100').entries billboard_list: list = list(filter( lambda chart: (chart.artist == artist_name), artist100_entries )) if len(billboard_list) == 0: output: dict = { "artist": "-", "image": "-", "isNew": "-", "lastPos": "-", "peakPos": "-", "rank": "-", "title": "-", "weeks": "-" } else: output: dict = { "artist": billboard_list[0].artist, "image": billboard_list[0].image, "isNew": billboard_list[0].isNew, "lastPos": billboard_list[0].lastPos, "peakPos": billboard_list[0].peakPos, "rank": billboard_list[0].rank, "title": billboard_list[0].title, "weeks": billboard_list[0].weeks } return output except Exception as e: print(type(e)) print(e.args) print(e) return None
def getchart(name, date): ''' Fetches the given chart for the given date and stores in CORPUS ''' str_date = str(date.date()) chart = billboard.ChartData(name, date=str_date, fetch=True) logger.info("Fetched {} songs".format(len(chart))) for song in chart: CORPUS.update({song.title + SEPARATOR + song.artist: str_date})
def create_playlist_from_chart(chart_id, chart_name, num_songs_phrase, web_url): """Create and populate a new playlist with the current Billboard chart with the given ID""" # Get the songs from the Billboard web page chart = billboard.ChartData(chart_id) chart_date = datetime.strptime(chart.date, '%Y-%m-%d').strftime("%B %d, %Y") # Create a new playlist, if it doesn't already exist pl_id = "" pl_title = "{0} - {1}".format(chart_name, chart_date) pl_description = ("This playlist contains the " + num_songs_phrase + "songs " "in the Billboard " + chart_name + " Songs chart for the " "week of " + chart_date + ". " + web_url) # Check for an existing playlist with the same title if playlist_exists_with_title(pl_title): print("Playlist already exists with title '" + pl_title + "'. " "Delete it manually and re-run the script to recreate it.") return pl_id = create_new_playlist(pl_title, pl_description) add_chart_entries_to_playlist(pl_id, chart.entries) return