Ejemplo n.º 1
0
def mark_verses(song_list_path):
    song_list = read_list_from_file(song_list_path)
    artist_list = read_list_from_file('final_artist_list.txt')
    process_song = get_process_song(artist_list)
    loop_and_process(song_list, process_song, "Song", lambda x: x,
                     marked_verses_dir)
    remove_duplicates_from_list_file(marked_verses_dir, skipped_artists_file)
def raw_songs_to_verse_split_songs(song_list_path, dir_prefix):
    with open(song_list_path) as listfile:
        song_list = listfile.readlines()
    song_list = [song.strip() for song in song_list]

    def process_song(song, bar):
        with open("{}/{}.json".format(cleaned_songs_dir,
                                      name_to_file_name(song))) as song_file:
            song = json.load(song_file)
        verses = []
        lyrics = song['lyrics']
        i = 0
        verse_lyrics = ''
        verse_metadata = ''

        def write_verse(v_metadata, v_lyrics, verses):
            if len(v_metadata.strip()) > 0:
                verses.append({'metadata': v_metadata, 'lyrics': v_lyrics})
            return verses

        while i < len(lyrics):
            # Parse the songs into each verse
            if lyrics[i] == '[':
                # we reached a new verse
                # append the previous verse if there was one
                verses = write_verse(verse_metadata, verse_lyrics, verses)
                # reset for the new verse
                verse_lyrics = ''
                verse_metadata = ''
                # start processing the new verse
                while i < len(lyrics) and lyrics[i] != ']':
                    verse_metadata = verse_metadata + lyrics[i]
                    i = i + 1
                if i < len(lyrics):
                    verse_metadata = verse_metadata + lyrics[i]
            else:
                verse_lyrics = verse_lyrics + lyrics[i]
            i = i + 1
        verses = write_verse(verse_metadata, verse_lyrics, verses)
        return {
            'title': song['title'],
            'verses': verses,
            'artist': song['artist'],
            'featured_artists': song['featured_artists']
        }

    def get_song_name(song):
        return song

    loop_and_process(song_list, process_song, "Song", get_song_name,
                     dir_prefix)
Ejemplo n.º 3
0
 def process_artist(name, bar):
     artist = genius.search_artist(name)
     songs = artist.songs
     def process_song(song, bar):
         return {
             'title': song.title,
             'artist': song.artist,
             'lyrics': song.lyrics,
             'featured_artists': [a['name'] for a in song.featured_artists]
         }
     def get_song_name(song):
         return song.artist + artist_song_split_token + song.title
     loop_and_process(songs, process_song, "Song", get_song_name, raw_songs_dir)
     return None
Ejemplo n.º 4
0
def run_bpe_on_songs(codes_file, song_list_path, out_dir):
    bpe = get_bpe_object(codes_file)
    song_list = read_list_from_file(song_list_path)

    def process_song(song, bar):
        with open("{}/{}.json".format(verses_with_tokens,
                                      name_to_file_name(song))) as song_file:
            song = json.load(song_file)
        for verse in song['verses']:
            if verse['valid']:
                lyrics = verse['lyrics']
                lyrics = apply_bpe_to_string(lyrics, bpe)
                verse['lyrics'] = lyrics
        return song

    loop_and_process(song_list, process_song, "Song", lambda x: x, out_dir)
Ejemplo n.º 5
0
def get_songs(name=None, csv=None):
    artists = pd.DataFrame([], columns=['Artist'])
    if csv is not None:
        print("\n Getting lyrics for all artists in {}".format(csv))
        with open(csv) as openfile:
            artists = openfile.readlines()
        artists = [artist.strip() for artist in artists]
    elif name is not None:
        print("\n Getting lyrics for {}".format(name))
        artists = pd.DataFrame([name], columns=['Artist'])
    else:
        print("No Input Artists")
    while len(artists) > 0:
        try:
            genius = instantiate_genius()
            # functions
            def process_artist(name, bar):
                artist = genius.search_artist(name)
                songs = artist.songs
                def process_song(song, bar):
                    return {
                        'title': song.title,
                        'artist': song.artist,
                        'lyrics': song.lyrics,
                        'featured_artists': [a['name'] for a in song.featured_artists]
                    }
                def get_song_name(song):
                    return song.artist + artist_song_split_token + song.title
                loop_and_process(songs, process_song, "Song", get_song_name, raw_songs_dir)
                return None
            def get_artist_name(name):
                return name
            loop_and_process(
                artists,
                process_artist,
                "Artist",
                get_artist_name,
                artist_lyric_dir,
            )
        except:
            e = sys.exc_info()[0]
            print(e)
        finally:
            completed_artists = read_list_from_file("{}/{}".format(artist_lyric_dir, "_LIST"))
            for artist in completed_artists:
                if artist in artists:
                    artists.remove(artist)
def artist_to_raw_song_files(artists_file):
    with open(artists_file) as openfile:
        artists = openfile.readlines()
    artists = [artist.strip() for artist in artists]
    for artist_name in tqdm(artists):
        with open("{}/{}".format(artist_lyric_dir,
                                 name_to_file_name(artist_name))) as jsonfile:
            artist = json.load(jsonfile)
            songs = artist["songs"]

            def process_song(song):
                return {
                    'title': song['title'],
                    'artist': song['primary_artist']['name'],
                    'lyrics': song['lyrics']
                }

            def get_song_name(song):
                return song['title']

            loop_and_process(songs, process_song, "Song", get_song_name,
                             raw_songs_dir)
Ejemplo n.º 7
0
def clean_song(song_list_path, out_dir):
    song_list = read_list_from_file(song_list_path)
    loop_and_process(song_list, process_song, "Song", lambda x: x, out_dir)
def verse_songs_extract_artists(song_list_path, dir_prefix):
    song_list = read_list_from_file(song_list_path)
    loop_and_process(song_list, process_song, "Song", get_song_name,
                     dir_prefix)
    remove_duplicates_from_list_file(verse_artists_dir, artists_list_file)
    remove_duplicates_from_list_file(verse_artists_dir, raw_artists_list_file)
Ejemplo n.º 9
0
def fix_tokens_for_verses(song_list_path, out_dir):
    song_list = read_list_from_file(song_list_path)
    loop_and_process(song_list, process_song, "Song", lambda x:x, out_dir)