def main():
    first = False
    with open('./Datasets/Billboard/ContinuousChart.csv', 'a+') as outputFile:
        writer = csv.writer(outputFile, delimiter=',', lineterminator='\n')
        if first:
            writer.writerow(
                ["Date", "Title", "ArtistName", "Rank", "Weeks", "isNew"])
        chart = billboard.ChartData('hot-100', date='2017-12-30')
        while '2016' not in chart.previousDate:
            chart = billboard.ChartData('hot-100', date=chart.previousDate)
            print(chart.previousDate, '\n', chart[:5])
            for i in range(len(chart)):
                s = chart[i]

                song = Song(s.title.replace(",", " ").lower())
                if s.isNew:
                    song.new = '1'
                else:
                    song.new = '0'
                song.artistName = s.artist.replace(",", " ").lower()
                song.rank = s.rank
                song.weeks = s.weeks
                song.date = chart.previousDate
                writer.writerow([
                    song.date, song.title, song.artistName, song.rank,
                    song.weeks, song.new
                ])

            time.sleep(10)
def billb(num, s_a):

    if (s_a == 'songs'):

        songs = billboard.ChartData('hot-100')

        output = ""
        i = 0
        while i < int(num):
            output += str(
                i +
                1) + " - " + songs[i].title + " -- " + songs[i].artist + "\n"
            print(
                str(i + 1) + " - " + songs[i].title + " -- " + songs[i].artist)
            i += 1

    if (s_a == "artists"):

        artists = billboard.ChartData('artist-100')

        output = ""
        i = 0
        while i < int(num):
            output += str(i + 1) + " - " + artists[i].artist + "\n"
            print(str(i + 1) + " - " + artists[i].artist)
            i += 1

    return output
Beispiel #3
0
    def read(self, range_tracker):
        """
        Implements the method `apache_beam.io.iobase.BoundedSource.read`.

        Scrapes charts from the Billboard.com website via the Billboard.py.
        """
        self.logger.info('Scraping Billboard.com charts.')

        chart = billboard.ChartData(CHART, date=START_DATE)

        self.logger.info('Scraping Billboard.com %s chart data since year %s',
                         CHART, chart.previousDate[:4])

        while chart.previousDate[:4] is not None and int(
                chart.previousDate[:4]) > LAST_YEAR:
            self.logger.info("Scraping chart %s for year %s", CHART,
                             chart.previousDate)

            for track in chart:
                yield (chart.previousDate[:4],
                       track.title + ' - ' + track.artist)

            try:
                chart = billboard.ChartData(CHART, chart.previousDate)
            except Exception as return_e:
                break
Beispiel #4
0
def gatherBillboardData():

    chart = billboard.ChartData('hot-100')
    chart.previousDate = "2002-07-06"
    i = 0
    fullDataFrame = pd.DataFrame(columns=[
        "name", "artist", "song_id", "danceability", "energy", "loudness",
        "mode", "speechiness", "acousticness", "instrumentalness", "liveness",
        "valence", "tempo", "duration_ms", "time_signature", "sections",
        "target", "chart_date", "popularity", "release_date", "weeks",
        "artist_popularity", "artist_followers", "number_of_artists",
        "list_of_artists", "key"
    ])
    start = time.time()
    while (chart.previousDate):
        print(chart.previousDate)
        fullDataFrame = getChartDataFrame(chart, fullDataFrame)
        chart = billboard.ChartData('hot-100', chart.previousDate)
        i += 1
        if (i == 960):  #Collecting data from the last 960 weeks
            break
        if (i == 480):
            print("halveis")
    end = time.time()
    print("total time: " + str(end - start))
    return fullDataFrame
def get_billboard_chart():
    hot_100 = billboard.ChartData(HOT_100)
    top_hiphop = billboard.ChartData(TOP_HIPHOP)
    top_edm = billboard.ChartData(TOP_EDM)
    #Getting the charts
    top_100_df = get_song_df(hot_100.entries)
    top_edm_df = get_song_df(top_edm.entries)
    top_hiphop_df = get_song_df(top_hiphop)
    #Removing Duplicates and concatenating the dfs
    edm_title_list = top_edm_df.full_title.get_values()
    top_100_title_list = top_100_df.full_title.get_values()
    mask_edm = [entry not in top_100_title_list for entry in edm_title_list]
    top_edm_df = top_edm_df[mask_edm]
    hiphop_title_list = top_hiphop_df.full_title.get_values()
    mask_hiphop = [
        entry not in top_100_title_list for entry in hiphop_title_list
    ]
    top_hiphop_df = top_hiphop_df[mask_hiphop]
    hiphop_title_list_new = top_hiphop_df.full_title.get_values()
    edm_title_list_new = top_edm_df.full_title.get_values()
    mask_edm_hiphop = [
        entry not in edm_title_list_new for entry in hiphop_title_list_new
    ]
    top_hiphop_df = top_hiphop_df[mask_edm_hiphop]
    concat_billboard_charts = pd.concat(
        [top_100_df, top_edm_df, top_hiphop_df]).sort_values(by='rank')
    return concat_billboard_charts
Beispiel #6
0
def get_longest_no_one():

    chart = billboard.ChartData('hot-100')
    prev = ""
    current = chart[0]
    top = 0
    current_top = ""

    while chart.previousDate:

        if current.title == prev.title and prev.title == current.title:
            prev = current
            chart = billboard.ChartData('hot-100', chart.previousDate)
            current = chart[0]
            continue

        elif (current.weeks > top):
            top = current.weeks
            current_top = current
            prev = current
            chart = billboard.ChartData('hot-100', chart.previousDate)
            current = chart[0]
        else:
            prev = current
            chart = billboard.ChartData('hot-100', chart.previousDate)
            current = chart[0]

    print current_top, "Weeks: ", top
Beispiel #7
0
def get_chart_entries(playlist, date):
    chart = billboard.ChartData(playlist, str(date))
    delta = timedelta(days=1)
    total_delta = timedelta(days=0)
    while len(chart.entries) == 0:
        total_delta += delta
        chart = billboard.ChartData(playlist, str(date + total_delta))
    return (chart, total_delta)
    def testDateRounding(self):
        """Checks that the Billboard website is rounding dates correctly: it should
        round up to the nearest date on which a chart was published.
        """
        chart = billboard.ChartData("hot-100", date="1000-10-10")
        self.assertEqual(chart.date, "1958-08-04")  # The first Hot 100 chart

        chart = billboard.ChartData("hot-100", date="1996-07-31")
        self.assertEqual(chart.date, "1996-08-03")
    def test_date_rounding(self):
        """Checks that the Billboard website is rounding dates correctly: it should
        round up to the nearest date on which a chart was published.
        """
        chart = billboard.ChartData('hot-100', date='1000-10-10')
        self.assertEqual(chart.date, '1958-08-04')  # The first Hot 100 chart

        chart = billboard.ChartData('hot-100', date='1996-07-30')
        self.assertEqual(chart.date, '1996-08-03')
Beispiel #10
0
def get_charts(date):
    chart = billboard.ChartData('hot-100', date=date)
    while chart.previousDate:
        chart = billboard.ChartData('hot-100', chart.previousDate)
        for c in chart:
            song = vars(c)
            song["date"] = chart.date
            del song["image"]
            yield song
        print("Processed date ", chart.date)
Beispiel #11
0
def main():

    # Check command line argument number for correctness. Should only be 1
    if len(argv) != 2:

        print('Expected a file name as the only argument.')
        exit(1)

    # Parse the command line argument as the name of the file to save data to
    file_to_use = argv[1]

    # Make sure string passed in ends with .json
    if '.json' not in file_to_use:

        file_to_use += '.json'

    # Grab current chart
    current_chart = bd.ChartData('hot-100', date='2018-02-24')

    # Create a empty list of dicts of charts
    data = []

    try:

        # Set up our loop to download all the data at once
        while current_chart.previousDate != '1958-08-04':

            print('Grabbing chart data %s as a dict...\n' % current_chart.date)

            # Grab the data and save it into a dict
            next_chart = grab_data(current_chart)

            # append that dict to the list of dicts
            data.append(next_chart)

            # Update current chart to get next set of data
            current_chart = bd.ChartData('hot-100', current_chart.previousDate)

        # Add the final chart to the list
        print('Grabbing chart data %s as a dict...\n' % current_chart.date)
        final_chart = grab_data(current_chart)
        data.append(final_chart)

        # Dump that list of dicts into the file given
        print('Appending to .json file...\n')
        create_json(data, file_to_use)

        print('Complete!\n')

    except (EOFError, KeyboardInterrupt):

        print('\nInterrupted. Removing incomplete data file and exiting...\n')
        exit(1)
Beispiel #12
0
def populate_artists_and_songs(num_of_weeks): #date in the form of YYYY-MM-DD
    '''
    Can be used for testing purposes, populates the top_music.Artist/Song tables with exactly the 
    same input drawned from Billboard.com, so "pull_week" always succeeds
    '''
    chart = billboard.ChartData('hot-100')
    for x in range(0, num_of_weeks):
        for song in chart:
            insert_artist(parse_artist_name(song.artist), "", "1", "")
            artist_id = validate_artist(parse_artist_name(song.artist))
            insert_song(song.title, artist_id, DUMMY_DATE)
        chart = billboard.ChartData('hot-100', chart.previousDate)
Beispiel #13
0
    def testPreviousNext(self):
        """Checks that the date, previousDate, and nextDate attributes are parsed
        from the HTML, not computed. Specifically, we shouldn't assume charts are
        always published seven days apart, since (as this example demonstrates)
        this is not true.
        """
        chart = billboard.ChartData("hot-100", date="1962-01-06")
        self.assertEqual(chart.date, "1962-01-06")
        self.assertEqual(chart.previousDate, "1961-12-25")

        chart = billboard.ChartData("hot-100", date="1961-12-25")
        self.assertEqual(chart.date, "1961-12-25")
        self.assertEqual(chart.nextDate, "1962-01-06")
Beispiel #14
0
def scrapeDataForYear(year, conn, onYearDone):
    finalDate = getFinalDate(year)
    lastChart = billboard.ChartData('hot-100',
                                    date=finalDate,
                                    fetch=True,
                                    timeout=30)
    prevYear = getPreviousYear(year)
    chart = lastChart
    while (chart.previousDate and prevYear not in chart.date):
        saveChart(chart, conn)
        onYearDone()
        time.sleep(10)
        chart = billboard.ChartData('hot-100', chart.previousDate, timeout=45)
Beispiel #15
0
def scrape_billboard(number_of_weeks):
    """
    input - number of weeks to scrape into the past
    output - dictionary with spotify id as key, value is dictionary with title
        and artist values
    """
    from spotify_utils import create_auth, set_track_data, set_album_data, set_artist_data, set_audio_features
    from utils import write_list_of_dictionaries_to_file
    import billboard
    import Tkinter

    chart = billboard.ChartData(name='hot-100')
    data = []
    id_list = []
    count = 0
    sp = create_auth()

    while count != number_of_weeks:
        print "Getting data from: " + chart.date
        for track in chart:
            # Add only unique songs since many track stay on the chart for
            # weeks
            if track.spotifyID not in id_list and track.spotifyID:
                id_list.append(track.spotifyID)
                song = {}
                song['artist'] = track.artist
                song['title'] = track.title
                song['track_id'] = str(track.spotifyID)
                try:
                    set_track_data(song, sp)
                    set_album_data(song, sp)
                    set_artist_data(song, sp)
                    set_audio_features(song, sp)
                except:
                    # if a spotify error occurs, refresh the token
                    sp = create_auth()
                    # write our data to a file just in case
                    write_list_of_dictionaries_to_file(
                        data, "BillboardBackupAt" + str(count) + ".csv")
                    continue

                data.append(song)
        chart = billboard.ChartData(name='hot-100', date=chart.previousDate)
        count += 1

    # Notify me when done
    Tkinter.Tk().bell()

    return data
def unique_top_tracks_generator():
    track_dict = {}
    for year in range(1958, 2021):
        seen_tracks = set()
        chart = billboard.ChartData("hot-100", date=str(year) + "-12-31")
        while chart.previousDate[:4] == str(year):
            print(chart.previousDate)
            for track in chart:
                duplicate = track.title in seen_tracks
                if not duplicate:
                    seen_tracks.add((track.title, track.artist))
            time.sleep(THROTTLE_TIME)
            chart = billboard.ChartData("hot-100", chart.previousDate)
        track_dict[year] = list(seen_tracks)
    return track_dict
def get_songs(songs, chart_name):
    chart = billboard.ChartData(chart_name)

    while chart.previousDate and not chart.date.startswith('1979'):
        for s in chart:
            songs[s.title] = s.artist

        try:
            chart = billboard.ChartData(chart_name, chart.previousDate)
        except requests.exceptions.RequestException:
            time.sleep(10)
            continue

        print(chart)
        time.sleep(10)
Beispiel #18
0
def extract_billboard_top_100(previousDate):
    for k in range(10000):
        chart = billboard.ChartData('hot-100', previousDate)
        try:
            while chart.previousDate[:4] >= '1990':
                ll1 = []
                time.sleep(2)
                chart = billboard.ChartData('hot-100', chart.previousDate)
                print("Correct", chart.date)
                for i in chart:
                    ll1.append(tuple((i.title, i.artist, chart.date)))
                ll.append(ll1)
        except:
            print(chart.date)
            previousDate = chart.date
def makefile(iniyear, endyear, filepath):
    f = open(filepath, 'w')
    chart = billboard.ChartData('r-b-hip-hop-songs',
                                '{}-01-01'.format(iniyear))

    while chart.nextDate.split("-")[0] != endyear:
        f.write("Date: {}\n\n".format(chart.date))
        for songs in chart:
            f.write("{} | {} | {} \n".format(songs.title, songs.artist,
                                             songs.weeks))
        time.sleep(5)
        f.write("\n\n")
        chart = billboard.ChartData('r-b-hip-hop-songs', chart.nextDate)

    f.close()
Beispiel #20
0
def main():
    hot100 = billboard.ChartData('hot-100')
    # billboard.ChartData('hot-100', date=None, fetch=True, max_retries=5, timeout=25)

    print("Billboard hot-100 chart from " + hot100.date + "\n---------------------------------------")
    for song in hot100:
        print(vars(song))
def getChart(site, genre, date):
    artists = []

    print(f'{genre} - {date}')

    if site == 'billboard':
        chart = billboard.ChartData(genre, date=date)
        for song in chart:
            artists.append(song.artist)
    elif site == 'gaon':
        ranking = gaon.week(day_time=datetime.strptime(date, '%Y-%m-%d'))
        for rank in ranking:
            artists.append(rank['artist'])
    else:
        print('Site must be billboard, gaon, or oricon')
        exit()

    artists = [
        re.sub('Featuring.*| [xX] .*| & .*', '', artist) for artist in artists
    ]
    map(str.strip, artists)

    artists = np.unique(artists)

    return artists
    def get_billboard_list(self):
        """Get list of billboard songs. """

        date_list = self.get_dates()
        file_exists = os.path.isfile(self.path + self.billboard_result_path)
        f = open(self.path + self.billboard_result_path, 'a')
        writer_top = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        if not file_exists:
            writer_top.writerow(['title'] + ['artist'] + ['retrieve_date'] +
                                ['rank'] + ['weeks'])
            f.close()

        for d in date_list:
            print(d)
            chart = billboard.ChartData(self.chart, date=d)
            f = open(self.path + self.billboard_result_path, 'a')
            writer_top = csv.writer(f,
                                    delimiter=',',
                                    quoting=csv.QUOTE_MINIMAL)
            chart_len = len(chart)
            for rank in range(0, chart_len - 1):
                result_row = [[
                    chart.entries[rank].title, chart.entries[rank].artist, d,
                    chart.entries[rank].rank, chart.entries[rank].weeks
                ]]
                writer_top.writerows(result_row)
Beispiel #23
0
def TrackList(spotifyObject):
    cleanAry = []
    chart = billboard.ChartData('hot-100')
    for song in chart:
        if "Featuring" in song.artist:
            artistString = song.artist
            cleanStr = artistString.replace("Featuring", "feat")
            if "Swae" in cleanStr:
                print("---------------------------")
                cleanStr = cleanStr.replace("Swae Lee Or", "")

            cleanAry.append((song.title, cleanStr))
        elif "&" in song.artist:
            artistString = song.artist
            cleanStr = artistString.replace("&", "")
            cleanAry.append((song.title, cleanStr))
        elif "Swae" in song.artist:
            cleanStr = song.artist.replace("Swae Lee Or", "")
            cleanAry.append((song.title, cleanStr))
        else:
            cleanAry.append((song.title, song.artist))

    trackAry = []
    song = ""
    for i in range(len(cleanAry)):
        song = cleanAry[i][0] + " " + cleanAry[i][1]
        print(song)
        searchResults = spotifyObject.search(song, 1, 0, "track")

        trackID = searchResults['tracks']['items'][0]['id']

        trackAry.append(trackID)
    #trackID =['2XW4DbS6NddZxRPm5rMCeY']
    return trackAry
Beispiel #24
0
def test_datetime_date():
    """Checks that ChartData correctly handles datetime objects as the
    date parameter.
    """
    chart = billboard.ChartData('hot-100', datetime.date(2016, 7, 9))
    assert len(chart) == 100
    assert chart.date == '2016-07-09'
Beispiel #25
0
def music_api():
    chart = billboard.ChartData('hot-100')
    chart_list = "<ol>"
    for song in chart :
        chart_list = chart_list + "<li>" + song.title + " - " + song.artist + "</li><br><br>"
    chart_list = Markup(chart_list + "</ol>")
    return chart_list
Beispiel #26
0
 def testDatetimeDate(self):
     """Checks that ChartData correctly handles datetime objects as the
     date parameter.
     """
     chart = billboard.ChartData("hot-100", datetime.date(2016, 7, 9))
     self.assertEqual(len(chart), 100)
     self.assertEqual(chart.date, "2016-07-09")
Beispiel #27
0
    def get_data(self):

        st_dt = datetime.datetime.strptime(self.start_date, '%Y-%m-%d')
        ed_dt = datetime.datetime.strptime(self.end_date, '%Y-%m-%d')

        if st_dt > ed_dt:
            print("Start date should not be greater than end date")
            sys.exit()

        calculated_days = ed_dt - st_dt
        diff_days = calculated_days.days
        if diff_days > 30:
            print("Days should not exceed more than 30")
            sys.exit()

        for chart_name in self.chart_name_list:
            current_dt = st_dt
            for date in range(diff_days):
                formatted_dt = datetime.datetime.strftime(
                    current_dt, '%Y-%m-%d')
                chart_data = billboard.ChartData(chart_name, formatted_dt)
                parsed_data = self.parse_data(chart_data, chart_name)
                file_name = '/var/www/msg/api/billboard-charts/source/' + chart_name + '_' + self.start_date + '_' + self.end_date + '_' + str(
                    time.time()) + '.json'
                self.write_to_json_file(file_name, parsed_data)
                parsed_data_csv = self.csv_data(chart_data, chart_name)
                file_name = '/var/www/msg/api/billboard-charts/source/' + chart_name + '_' + self.start_date + '_' + self.end_date + '_' + str(
                    time.time()) + '.csv'
                self.write_to_csv_file(file_name, chart_name, parsed_data_csv)
                current_dt = current_dt + datetime.timedelta(days=1)
 def get_lastfm_input_artist(self, artist_name: str) -> Optional[dict]:
     try:
         artist100_entries = billboard.ChartData('artist-100').entries
         billboard_list: list = list(filter(
             lambda chart: (chart.artist == artist_name),
             artist100_entries
         ))
         if len(billboard_list) == 0:
             output: dict = {
                 "artist": "-",
                 "image": "-",
                 "isNew": "-",
                 "lastPos": "-",
                 "peakPos": "-",
                 "rank": "-",
                 "title": "-",
                 "weeks": "-"
             }
         else:
             output: dict = {
                 "artist": billboard_list[0].artist,
                 "image": billboard_list[0].image,
                 "isNew": billboard_list[0].isNew,
                 "lastPos": billboard_list[0].lastPos,
                 "peakPos": billboard_list[0].peakPos,
                 "rank": billboard_list[0].rank,
                 "title": billboard_list[0].title,
                 "weeks": billboard_list[0].weeks
             }
         return output
     except Exception as e:
         print(type(e))
         print(e.args)
         print(e)
         return None
def getchart(name, date):
    ''' Fetches the given chart for the given date and stores in CORPUS '''
    str_date = str(date.date())
    chart = billboard.ChartData(name, date=str_date, fetch=True)
    logger.info("Fetched {} songs".format(len(chart)))
    for song in chart:
        CORPUS.update({song.title + SEPARATOR + song.artist: str_date})
def create_playlist_from_chart(chart_id, chart_name, num_songs_phrase,
                               web_url):
    """Create and populate a new playlist with the current Billboard chart with the given ID"""
    # Get the songs from the Billboard web page
    chart = billboard.ChartData(chart_id)
    chart_date = datetime.strptime(chart.date,
                                   '%Y-%m-%d').strftime("%B %d, %Y")

    # Create a new playlist, if it doesn't already exist
    pl_id = ""
    pl_title = "{0} - {1}".format(chart_name, chart_date)
    pl_description = ("This playlist contains the " + num_songs_phrase +
                      "songs "
                      "in the Billboard " + chart_name +
                      " Songs chart for the "
                      "week of " + chart_date + ".  " + web_url)

    # Check for an existing playlist with the same title
    if playlist_exists_with_title(pl_title):
        print("Playlist already exists with title '" + pl_title + "'. "
              "Delete it manually and re-run the script to recreate it.")
        return

    pl_id = create_new_playlist(pl_title, pl_description)
    add_chart_entries_to_playlist(pl_id, chart.entries)
    return