def download_lyrics(artist_name): word_to_indices = {} title_to_lines = {} song_lyrics = pylyrics3.get_artist_lyrics(artist_name) for title, lyrics in song_lyrics.items(): lines = split_lines(lyrics) title_to_lines[title] = lines add_word_indices_to_dict(title, lines, word_to_indices) write_out_json(artist_name, word_to_indices, title_to_lines)
def getlyrics(): blinklyrics = pylyrics3.get_artist_lyrics('Blink-182') for k, v in blinklyrics.items(): print(v) with open('blinklyrics.txt', 'w') as file: file.write(json.dumps(blinklyrics)) with open("blinklyric.txt", 'w') as f: for value in blinklyrics.items(): f.write(value)
def save_manowar_lyrics(): manowar_lyrics = pylyrics3.get_artist_lyrics('manowar') # filter non-english filtered = { song: lyrics for song, lyrics in manowar_lyrics.items() if 'Version' not in song } with path('data', 'manowar.json') as file_path: with open(file_path, 'w', encoding='utf-8') as file: json.dump(filtered, file)
def get_lyrics(artist_name): # Gets raw lyrics in lowercase. albums = {} artist_lyrics = pylyrics3.get_artist_lyrics(artist_name, albums=True) for album_key, album_val in artist_lyrics.items(): albums[album_key] = {} for track_key, track_val in album_val.items(): if track_val: albums[album_key][track_key] = track_val.lower() return albums
def get_lyrics(artist): try: filename = artist.replace(" ", "_") # No spaces in filename if "/" in filename: # Prevent making another directory for names such as AC/DC filename = filename.replace("/", "-") if Path('database/' + filename).exists( ): # Prevent double work (remove a file if you want to rebuild its DB) print("Database for " + filename + " is already there.") else: print("Writing database for " + filename) with open('database/' + filename, 'w') as file: lyrics = pylyrics3.get_artist_lyrics(artist) for x in lyrics: file.write(lyrics[x]) # Write all lyrics to file except Exception as error: print(artist + " threw an exception") print(error)
def scrape_lyrics(artist, genre): """TODO: Wrapper for pylyrics3 query :artist: artist name (string) :genre: genre (string) :returns: returns a list of lists with artist name, genre, song name and lyrics. """ holder = [] tmp_artist_songs = pyl.get_artist_lyrics(artist) total_songs = len(tmp_artist_songs) song_counter = 0 for song in tqdm(tmp_artist_songs): print("Scraping {} - {} out of {}".format(song, song_counter, total_songs)) song_counter += 1 tmp_song_array = [artist, genre, song, tmp_artist_songs[song]] holder.append(tmp_song_array) return holder
def download_artist_lyrics(argv): os.mkdir(argv) lyrics = pyl.get_artist_lyrics(argv) f = open('../data/' + argv + '.txt', 'w') json.dump(lyrics, f)
def test_compound_artist(self): '''Test that collaborative songs ("Artist1 & Artist2") are included ''' l = pylyrics3.get_artist_lyrics('bon iver') # typo on lyricwiki :( self.assertTrue('Roslyn' in l.keys() or 'Rosyln' in l.keys())
def test_singles_and_soundtracks(self): '''Certan artists were throwing a TypeError on a None response, in addition to not including singles and soundtrack sections.''' l = pylyrics3.get_artist_lyrics('troye sivan') self.assertTrue('Strawberries & Cigarettes' in l)
newData = pd.DataFrame(columns=[ 'Artist', 'Genre', 'Band Popularity', 'Song', 'Year', 'Song Popularity', 'Lyrics', 'pyYear', 'pyLyrics' ]) numMatched = 0 for idx, artist in enumerate(artists): # if idx > 50: # break print("Getting data for %s, artist %d of %d: %f s" % (artist, idx, len(artists), time.time() - start)) artistSongs = data.loc[data['Artist'] == artist] artist_clean = artist.strip().lower().replace(',', '') try: songdict = pylyrics3.get_artist_lyrics(artist_clean, albums=True) if songdict != None: numMatched += 1 pyArtistSongs, pyArtistYears, lyrics = generateSongList(songdict) pyYear = [] pyLyrics = [] for idx, song in enumerate(artistSongs['Song']): songLower = re.sub('[\n?()\"\'-,.#$%~@^&*_+={};:<>]', '', song) songLower = songLower.lower() if songLower in pyArtistSongs: pyYear.append( pyArtistYears[pyArtistSongs.index(songLower)]) pyLyrics.append(lyrics[pyArtistSongs.index(songLower)]) else:
def test_album(self): '''PyLyrics3 parses albums with tracks''' bon_iver = pylyrics3.get_artist_lyrics('Bon Iver', albums=True) self.assertTrue('22, a Million (2016)' in bon_iver) self.assertTrue(len(bon_iver['22, a Million (2016)']) > 2)
import os import pylyrics3 as pl directory = "./lyrics/" if not os.path.exists(directory): os.makedirs(directory) search = pl.get_artist_lyrics("The wonder years") for i in search.keys(): songname = i lyrics = search[i] file = open(directory + songname + ".txt", "w") file.write(i + "\n\n") file.write("-------------------------------\n\n") file.write(lyrics) file.close()
if c in str(validFilenameChars)) artist_list = "../data/hiphop_artists.txt" with open(artist_list, "r") as f: data = f.readlines() for line in data: artists.append(line.strip()) print(artists) lyrics = {} i = 0 for artist in artists: print(artist) dictionary = pylyrics3.get_artist_lyrics(artist) artist = removeDisallowedFilenameChars(artist) if dictionary is not None: print("==========================") lyrics[artist] = dictionary j = 0 for song in dictionary: song_file = removeDisallowedFilenameChars(song) print(song) filename = "../data/hiphopdata" + slugify(artist) + "/" + slugify( song_file) + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename))
def test_get_artist(self): '''PyLyrics3 can grab all tracks by an artist + odd capitalization''' l = pylyrics3.get_artist_lyrics('lORDE') self.assertTrue('Tennis Court' in l) self.assertTrue('you think that' in l['Tennis Court'])
import pylyrics3 import string artist = 'bon iver' file = artist.replace(" ", "_") file2 = artist.replace(" ", "_") + "_stripped" lyrics = pylyrics3.get_artist_lyrics(artist) def scrape(outputfile): with open(outputfile, 'w') as db: for x in lyrics: db.write(lyrics[x]) # remove duplicate lines an punctuation def remove_dupes(inputfile, outputfile): lines_seen = set() # holds lines already seen outfile = open(outputfile, "a") remove = dict.fromkeys(map(ord, string.punctuation)) # remove punctuation for line in open(inputfile, "r"): if line not in lines_seen: # not a duplicate outfile.write(line.translate(remove)) lines_seen.add(line) outfile.close() scrape(file) remove_dupes(file, file2)