self.genres = genres self.styles = styles # Updates a song with discogs artists, labels, genres, and styles information def update(self, artists, labels, genres, styles): self.atists = artists self.labels = labels self.genres = genres self.styles = styles if scrape_chart_data: # Grab charts from 1/1/2000 to 1/1/2012 as training data start_date = datetime.date(2000, 1, 1) #end_date = datetime.date(2000, 1, 14) end_date = datetime.date(2012,1,1) charts = chart_scraper.get_charts('hot-100', start_date, end_date) print('Charts returned') # Separate out all unique songs by combining all songs with same title and artist ce_to_songs = dict() for chart in charts: for entry in chart: entry_string = entry.title + ' - ' + entry.artist # Deal with Featuring index = entry.artist.find('Featuring') artist = entry.artist if index != -1: artist = entry.artist[:index-1] ce_to_songs[entry_string] = Song(entry.title, [artist], entry.weeks, entry.peakPos) pickle.dump(ce_to_songs, open('chart-data/chart_songs_train.pickle', 'wb'))
# Get and save raw charts from billboard import pickle import datetime import chart_scraper ''' hot-100: top 100 songs billboard-200: top 200 albums r-b-hip-hop-songs: top 25 pop-songs: top 20 country-songs: top 25 rock-songs: top 25 dance-electronic-songs: top 25 latin-songs: top 25 christian-songs: top 25 ''' # Note: some charts do not have data for entire range of dates chart_names = ['hot-100', 'billboard-200', 'r-b-hip-hop-songs', 'pop-songs', 'latin-songs', \ 'country-songs', 'rock-songs', 'dance-electronic-songs', 'christian-songs'] for chart_name in chart_names: # Grab charts from 1/1/2000 to 11/1/2015 start_date = datetime.date(2000, 1, 1) end_date = datetime.date(2015, 11, 1) charts = chart_scraper.get_charts(chart_name, start_date, end_date) print('Charts returned for ' + chart_name) pickle.dump(charts, open('chart-data/' + chart_name + '_charts.pickle', 'wb'))