예제 #1
0
def download_lyrics(artist_name):
    word_to_indices = {}
    title_to_lines = {}

    song_lyrics = pylyrics3.get_artist_lyrics(artist_name)
    for title, lyrics in song_lyrics.items():
        lines = split_lines(lyrics)
        title_to_lines[title] = lines
        add_word_indices_to_dict(title, lines, word_to_indices)
    write_out_json(artist_name, word_to_indices, title_to_lines)
예제 #2
0
def getlyrics():
    blinklyrics = pylyrics3.get_artist_lyrics('Blink-182')
    for k, v in blinklyrics.items():
        print(v)

    with open('blinklyrics.txt', 'w') as file:
        file.write(json.dumps(blinklyrics))
    with open("blinklyric.txt", 'w') as f:
        for value in blinklyrics.items():
            f.write(value)
def save_manowar_lyrics():
    manowar_lyrics = pylyrics3.get_artist_lyrics('manowar')

    # filter non-english
    filtered = {
        song: lyrics
        for song, lyrics in manowar_lyrics.items() if 'Version' not in song
    }
    with path('data', 'manowar.json') as file_path:
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(filtered, file)
예제 #4
0
def get_lyrics(artist_name):  # Gets raw lyrics in lowercase.
    albums = {}

    artist_lyrics = pylyrics3.get_artist_lyrics(artist_name, albums=True)
    for album_key, album_val in artist_lyrics.items():
        albums[album_key] = {}
        for track_key, track_val in album_val.items():
            if track_val:
                albums[album_key][track_key] = track_val.lower()

    return albums
예제 #5
0
def get_lyrics(artist):
    try:
        filename = artist.replace(" ", "_")  # No spaces in filename
        if "/" in filename:  # Prevent making another directory for names such as AC/DC
            filename = filename.replace("/", "-")
        if Path('database/' + filename).exists(
        ):  # Prevent double work (remove a file if you want to rebuild its DB)
            print("Database for " + filename + " is already there.")
        else:
            print("Writing database for " + filename)
            with open('database/' + filename, 'w') as file:
                lyrics = pylyrics3.get_artist_lyrics(artist)
                for x in lyrics:
                    file.write(lyrics[x])  # Write all lyrics to file
    except Exception as error:
        print(artist + " threw an exception")
        print(error)
예제 #6
0
def scrape_lyrics(artist, genre):
    """TODO: Wrapper for pylyrics3 query

    :artist: artist name (string)
    :genre: genre (string)
    :returns: returns a list of lists with artist name, genre, song name and lyrics.

    """
    holder = []
    tmp_artist_songs = pyl.get_artist_lyrics(artist)
    total_songs = len(tmp_artist_songs)
    song_counter = 0
    for song in tqdm(tmp_artist_songs):
        print("Scraping {} - {} out of {}".format(song, song_counter,
                                                  total_songs))
        song_counter += 1
        tmp_song_array = [artist, genre, song, tmp_artist_songs[song]]
        holder.append(tmp_song_array)
    return holder
예제 #7
0
def download_artist_lyrics(argv):
    os.mkdir(argv)
    lyrics = pyl.get_artist_lyrics(argv)
    f = open('../data/' + argv + '.txt', 'w')
    json.dump(lyrics, f)
예제 #8
0
 def test_compound_artist(self):
     '''Test that collaborative songs ("Artist1 & Artist2") are included '''
     l = pylyrics3.get_artist_lyrics('bon iver')
     # typo on lyricwiki :(
     self.assertTrue('Roslyn' in l.keys() or 'Rosyln' in l.keys())
예제 #9
0
 def test_singles_and_soundtracks(self):
     '''Certan artists were throwing a TypeError on a None response, in
     addition to not including singles and soundtrack sections.'''
     l = pylyrics3.get_artist_lyrics('troye sivan')
     self.assertTrue('Strawberries & Cigarettes' in l)
예제 #10
0
newData = pd.DataFrame(columns=[
    'Artist', 'Genre', 'Band Popularity', 'Song', 'Year', 'Song Popularity',
    'Lyrics', 'pyYear', 'pyLyrics'
])

numMatched = 0
for idx, artist in enumerate(artists):
    #    if idx > 50:
    #        break
    print("Getting data for %s, artist %d of %d: %f s" %
          (artist, idx, len(artists), time.time() - start))
    artistSongs = data.loc[data['Artist'] == artist]
    artist_clean = artist.strip().lower().replace(',', '')

    try:
        songdict = pylyrics3.get_artist_lyrics(artist_clean, albums=True)
        if songdict != None:
            numMatched += 1
            pyArtistSongs, pyArtistYears, lyrics = generateSongList(songdict)

            pyYear = []
            pyLyrics = []
            for idx, song in enumerate(artistSongs['Song']):
                songLower = re.sub('[\n?()\"\'-,.#$%~@^&*_+={};:<>]', '', song)
                songLower = songLower.lower()

                if songLower in pyArtistSongs:
                    pyYear.append(
                        pyArtistYears[pyArtistSongs.index(songLower)])
                    pyLyrics.append(lyrics[pyArtistSongs.index(songLower)])
                else:
예제 #11
0
 def test_album(self):
     '''PyLyrics3 parses albums with tracks'''
     bon_iver = pylyrics3.get_artist_lyrics('Bon Iver', albums=True)
     self.assertTrue('22, a Million (2016)' in bon_iver)
     self.assertTrue(len(bon_iver['22, a Million (2016)']) > 2)
예제 #12
0
import os
import pylyrics3 as pl

directory = "./lyrics/"
if not os.path.exists(directory):
    os.makedirs(directory)

search = pl.get_artist_lyrics("The wonder years")

for i in search.keys():
    songname = i
    lyrics = search[i]
    file = open(directory + songname + ".txt", "w")
    file.write(i + "\n\n")
    file.write("-------------------------------\n\n")
    file.write(lyrics)
    file.close()
                   if c in str(validFilenameChars))


artist_list = "../data/hiphop_artists.txt"
with open(artist_list, "r") as f:
    data = f.readlines()

    for line in data:
        artists.append(line.strip())
    print(artists)

lyrics = {}
i = 0
for artist in artists:
    print(artist)
    dictionary = pylyrics3.get_artist_lyrics(artist)
    artist = removeDisallowedFilenameChars(artist)
    if dictionary is not None:
        print("==========================")

        lyrics[artist] = dictionary
        j = 0
        for song in dictionary:
            song_file = removeDisallowedFilenameChars(song)
            print(song)

            filename = "../data/hiphopdata" + slugify(artist) + "/" + slugify(
                song_file) + ".txt"
            if not os.path.exists(os.path.dirname(filename)):
                try:
                    os.makedirs(os.path.dirname(filename))
 def test_album(self):
     '''PyLyrics3 parses albums with tracks'''
     bon_iver = pylyrics3.get_artist_lyrics('Bon Iver', albums=True)
     self.assertTrue('22, a Million (2016)' in bon_iver)
     self.assertTrue(len(bon_iver['22, a Million (2016)']) > 2)
예제 #15
0
 def test_get_artist(self):
     '''PyLyrics3 can grab all tracks by an artist + odd capitalization'''
     l = pylyrics3.get_artist_lyrics('lORDE')
     self.assertTrue('Tennis Court' in l)
     self.assertTrue('you think that' in l['Tennis Court'])
 def test_get_artist(self):
     '''PyLyrics3 can grab all tracks by an artist + odd capitalization'''
     l = pylyrics3.get_artist_lyrics('lORDE')
     self.assertTrue('Tennis Court' in l)
     self.assertTrue('you think that' in l['Tennis Court'])
예제 #17
0
import pylyrics3
import string

artist = 'bon iver'
file = artist.replace(" ", "_")
file2 = artist.replace(" ", "_") + "_stripped"
lyrics = pylyrics3.get_artist_lyrics(artist)


def scrape(outputfile):
    with open(outputfile, 'w') as db:
        for x in lyrics:
            db.write(lyrics[x])


# remove duplicate lines an punctuation


def remove_dupes(inputfile, outputfile):
    lines_seen = set()  # holds lines already seen
    outfile = open(outputfile, "a")
    remove = dict.fromkeys(map(ord, string.punctuation))  # remove punctuation
    for line in open(inputfile, "r"):
        if line not in lines_seen:  # not a duplicate
            outfile.write(line.translate(remove))
            lines_seen.add(line)
    outfile.close()


scrape(file)
remove_dupes(file, file2)