def mark_verses(song_list_path): song_list = read_list_from_file(song_list_path) artist_list = read_list_from_file('final_artist_list.txt') process_song = get_process_song(artist_list) loop_and_process(song_list, process_song, "Song", lambda x: x, marked_verses_dir) remove_duplicates_from_list_file(marked_verses_dir, skipped_artists_file)
def raw_songs_to_verse_split_songs(song_list_path, dir_prefix): with open(song_list_path) as listfile: song_list = listfile.readlines() song_list = [song.strip() for song in song_list] def process_song(song, bar): with open("{}/{}.json".format(cleaned_songs_dir, name_to_file_name(song))) as song_file: song = json.load(song_file) verses = [] lyrics = song['lyrics'] i = 0 verse_lyrics = '' verse_metadata = '' def write_verse(v_metadata, v_lyrics, verses): if len(v_metadata.strip()) > 0: verses.append({'metadata': v_metadata, 'lyrics': v_lyrics}) return verses while i < len(lyrics): # Parse the songs into each verse if lyrics[i] == '[': # we reached a new verse # append the previous verse if there was one verses = write_verse(verse_metadata, verse_lyrics, verses) # reset for the new verse verse_lyrics = '' verse_metadata = '' # start processing the new verse while i < len(lyrics) and lyrics[i] != ']': verse_metadata = verse_metadata + lyrics[i] i = i + 1 if i < len(lyrics): verse_metadata = verse_metadata + lyrics[i] else: verse_lyrics = verse_lyrics + lyrics[i] i = i + 1 verses = write_verse(verse_metadata, verse_lyrics, verses) return { 'title': song['title'], 'verses': verses, 'artist': song['artist'], 'featured_artists': song['featured_artists'] } def get_song_name(song): return song loop_and_process(song_list, process_song, "Song", get_song_name, dir_prefix)
def process_artist(name, bar): artist = genius.search_artist(name) songs = artist.songs def process_song(song, bar): return { 'title': song.title, 'artist': song.artist, 'lyrics': song.lyrics, 'featured_artists': [a['name'] for a in song.featured_artists] } def get_song_name(song): return song.artist + artist_song_split_token + song.title loop_and_process(songs, process_song, "Song", get_song_name, raw_songs_dir) return None
def run_bpe_on_songs(codes_file, song_list_path, out_dir): bpe = get_bpe_object(codes_file) song_list = read_list_from_file(song_list_path) def process_song(song, bar): with open("{}/{}.json".format(verses_with_tokens, name_to_file_name(song))) as song_file: song = json.load(song_file) for verse in song['verses']: if verse['valid']: lyrics = verse['lyrics'] lyrics = apply_bpe_to_string(lyrics, bpe) verse['lyrics'] = lyrics return song loop_and_process(song_list, process_song, "Song", lambda x: x, out_dir)
def get_songs(name=None, csv=None): artists = pd.DataFrame([], columns=['Artist']) if csv is not None: print("\n Getting lyrics for all artists in {}".format(csv)) with open(csv) as openfile: artists = openfile.readlines() artists = [artist.strip() for artist in artists] elif name is not None: print("\n Getting lyrics for {}".format(name)) artists = pd.DataFrame([name], columns=['Artist']) else: print("No Input Artists") while len(artists) > 0: try: genius = instantiate_genius() # functions def process_artist(name, bar): artist = genius.search_artist(name) songs = artist.songs def process_song(song, bar): return { 'title': song.title, 'artist': song.artist, 'lyrics': song.lyrics, 'featured_artists': [a['name'] for a in song.featured_artists] } def get_song_name(song): return song.artist + artist_song_split_token + song.title loop_and_process(songs, process_song, "Song", get_song_name, raw_songs_dir) return None def get_artist_name(name): return name loop_and_process( artists, process_artist, "Artist", get_artist_name, artist_lyric_dir, ) except: e = sys.exc_info()[0] print(e) finally: completed_artists = read_list_from_file("{}/{}".format(artist_lyric_dir, "_LIST")) for artist in completed_artists: if artist in artists: artists.remove(artist)
def artist_to_raw_song_files(artists_file): with open(artists_file) as openfile: artists = openfile.readlines() artists = [artist.strip() for artist in artists] for artist_name in tqdm(artists): with open("{}/{}".format(artist_lyric_dir, name_to_file_name(artist_name))) as jsonfile: artist = json.load(jsonfile) songs = artist["songs"] def process_song(song): return { 'title': song['title'], 'artist': song['primary_artist']['name'], 'lyrics': song['lyrics'] } def get_song_name(song): return song['title'] loop_and_process(songs, process_song, "Song", get_song_name, raw_songs_dir)
def clean_song(song_list_path, out_dir): song_list = read_list_from_file(song_list_path) loop_and_process(song_list, process_song, "Song", lambda x: x, out_dir)
def verse_songs_extract_artists(song_list_path, dir_prefix): song_list = read_list_from_file(song_list_path) loop_and_process(song_list, process_song, "Song", get_song_name, dir_prefix) remove_duplicates_from_list_file(verse_artists_dir, artists_list_file) remove_duplicates_from_list_file(verse_artists_dir, raw_artists_list_file)
def fix_tokens_for_verses(song_list_path, out_dir): song_list = read_list_from_file(song_list_path) loop_and_process(song_list, process_song, "Song", lambda x:x, out_dir)