def write_out_songs(song_names, outfile):
    for song_name in tqdm(song_names):
        with open("{}/{}.json".format(
                verses_with_tokens,
                name_to_file_name(song_name))) as song_file:
            song = json.load(song_file)
        for verse in song['verses']:
            if verse['valid']:
                outfile.write(verse['lyrics'] + '\n')
Exemple #2
0
 def process_song(song, bar):
     with open("{}/{}.json".format(verses_with_tokens,
                                   name_to_file_name(song))) as song_file:
         song = json.load(song_file)
     for verse in song['verses']:
         if verse['valid']:
             lyrics = verse['lyrics']
             lyrics = apply_bpe_to_string(lyrics, bpe)
             verse['lyrics'] = lyrics
     return song
def process_song(song, bar):
    with open("{}/{}.json".format(cleaned_verses_dir,
                                  name_to_file_name(song))) as song_file:
        song = json.load(song_file)
    for verse in song['verses']:
        artists = get_artists_from_metadata(song['title'], verse['metadata'],
                                            song['artist'],
                                            song['featured_artists'])
        verse['artists'] = artists
    return song
def create_train_file(list_file, out_file):
    verses_list = []
    song_names = read_list_from_file(list_file)
    for song_name in tqdm(song_names):
        with open("{}/{}.json".format(
                bpe_songs_dir, name_to_file_name(song_name))) as song_file:
            song = json.load(song_file)
        for verse in song['verses']:
            if verse['valid']:
                train_verse = {}
                train_verse['artist_id'] = verse['artist_id']
                train_verse['lyrics'] = verse['lyrics']
                verses_list.append(train_verse)
    with open(out_file, 'w') as openfile:
        json.dump(verses_list, openfile)
def process_song(song, bar):
    with open("{}/{}.json".format(marked_verses_dir, name_to_file_name(song))) as song_file:
        song = json.load(song_file)
    for verse in song['verses']:
        new_lyrics = verse['lyrics']
        lines = [line.strip() for line in new_lyrics.split('\n')]
        lines = list(filter(lambda s: s != '', lines))
        # reconstruct the lines together
        cleaned_lyrics = ''
        for line in lines:
            # concat lines together and add the end line token back
            cleaned_lyrics = cleaned_lyrics + 'S ' + line + ' L '
        cleaned_lyrics = cleaned_lyrics.strip()
        verse['lyrics'] = cleaned_lyrics
    return song
Exemple #6
0
def analyze_verses(song_list_path, song_dir):
    song_list = read_list_from_file(song_list_path)
    analysis = {'verses': {}, 'lines': {}, 'words': {}, 'words_per_verse': {}}
    artists = {}
    songs = {}
    bar = tqdm(song_list)
    update_songs = get_update_songs()
    for song_name in bar:
        # bar.set_description("Starting {}".format(song_name))
        with open("{}/{}.json".format(
                song_dir, name_to_file_name(song_name))) as song_file:
            song = json.load(song_file)
        title = song['title']
        num_verses = len(song['verses'])
        update_analysis(analysis, 'verses', num_verses, title)
        update_artist(artists,
                      clean_artist_names(song['artist']).strip(), 1, 0, 0)
        # handle verses
        for verse in song['verses']:
            if not verse['valid']:
                continue
            # handle lines
            lines = [line.strip() for line in verse['lyrics'].split('\n')]
            lines = list(filter(lambda s: s != '', lines))
            num_lines = len(lines)
            update_analysis(analysis, 'lines', num_lines, title)
            update_artist(artists, verse['artists'][0], 0, 1, num_lines)
            update_songs(songs, "{} || {}".format(song['artist'],
                                                  song['title']),
                         verse['artists'][0], num_lines)
            # handle words
            total_words = 0
            for line in lines:
                words = [word.strip() for word in line.split()]
                words = list(filter(lambda s: s != '', words))
                num_words = len(words)
                total_words = total_words + num_words
                update_analysis(analysis, 'words', num_words, title)
            update_analysis(analysis, 'words_per_verse', total_words, title)
    for key in analysis.keys():
        for num in analysis[key].keys():
            analysis[key][num]['songs'] = list(analysis[key][num]['songs'])
    with open("verse_analysis.json", "w") as outfile:
        json.dump(analysis, outfile)
    with open("artist_analysis.json", "w") as outfile:
        json.dump(artists, outfile)
    with open("song_analysis.json", "w") as outfile:
        json.dump(songs, outfile)
Exemple #7
0
def analyze_characters(dir_path, list_file, input_format, out_file):
    song_list = read_list_from_file("{}/{}".format(dir_path, list_file))
    character_dict = {}
    j = 1
    start = time.time()
    bar = tqdm(song_list)
    for song_name in bar:
        # bar.write("starting {}, {} out of {}".format(song_name, j, len(song_list)))
        song_file_name = name_to_file_name(song_name.strip())
        with open('{}/{}.json'.format(dir_path, song_file_name)) as jsonfile:
            song = json.load(jsonfile)
            lyric_blocks = get_lyric_blocks(song, input_format)
            for lyrics in lyric_blocks:
                for i in range(0, len(lyrics)):
                    c = lyrics[i]
                    if re.search(r'[^a-zA-Z0-9]+',
                                 c) is not None and c not in char_allow_list:
                        # add to characters dictionary
                        if c not in character_dict.keys():
                            character_dict[c] = {
                                "count":
                                1,
                                "context": [{
                                    "song": song_name,
                                    "line": get_context(lyrics, i)
                                }]
                            }
                        else:
                            character_dict[c][
                                'count'] = character_dict[c]['count'] + 1
                            character_dict[c]['context'].append({
                                "song":
                                song_name,
                                "line":
                                get_context(lyrics, i)
                            })
        j = j + 1
    with open("{}.json".format(out_file), "w") as openfile:
        json.dump(character_dict, openfile)
    time_taken = str(datetime.timedelta(seconds=time.time() - start))
    print("{} for {}".format(time_taken, len(song_list)))
Exemple #8
0
def process_song(song, bar):
    with open("{}/{}.json".format(verse_split_songs_dir,
                                  name_to_file_name(song))) as song_file:
        song = json.load(song_file)
    new_verses = []
    for verse in song['verses']:
        new_lyrics = clean_lyrics(verse['lyrics'])
        verse['lyrics'] = new_lyrics
        if new_lyrics.strip() != '':
            # this is still a good verse, add it back
            # we remove the verses that are empty
            new_verses.append(verse)
        else:
            with open(
                    "{}/{}".format(cleaned_verses_dir,
                                   removed_verse_metadata_file),
                    'a') as openfile:
                openfile.write('{} || {}\n'.format(song['title'],
                                                   verse['metadata']))
    song['verses'] = new_verses
    return song
    def process_song(song, bar):
        with open("{}/{}.json".format(cleaned_songs_dir,
                                      name_to_file_name(song))) as song_file:
            song = json.load(song_file)
        verses = []
        lyrics = song['lyrics']
        i = 0
        verse_lyrics = ''
        verse_metadata = ''

        def write_verse(v_metadata, v_lyrics, verses):
            if len(v_metadata.strip()) > 0:
                verses.append({'metadata': v_metadata, 'lyrics': v_lyrics})
            return verses

        while i < len(lyrics):
            # Parse the songs into each verse
            if lyrics[i] == '[':
                # we reached a new verse
                # append the previous verse if there was one
                verses = write_verse(verse_metadata, verse_lyrics, verses)
                # reset for the new verse
                verse_lyrics = ''
                verse_metadata = ''
                # start processing the new verse
                while i < len(lyrics) and lyrics[i] != ']':
                    verse_metadata = verse_metadata + lyrics[i]
                    i = i + 1
                if i < len(lyrics):
                    verse_metadata = verse_metadata + lyrics[i]
            else:
                verse_lyrics = verse_lyrics + lyrics[i]
            i = i + 1
        verses = write_verse(verse_metadata, verse_lyrics, verses)
        return {
            'title': song['title'],
            'verses': verses,
            'artist': song['artist'],
            'featured_artists': song['featured_artists']
        }
def artist_to_raw_song_files(artists_file):
    with open(artists_file) as openfile:
        artists = openfile.readlines()
    artists = [artist.strip() for artist in artists]
    for artist_name in tqdm(artists):
        with open("{}/{}".format(artist_lyric_dir,
                                 name_to_file_name(artist_name))) as jsonfile:
            artist = json.load(jsonfile)
            songs = artist["songs"]

            def process_song(song):
                return {
                    'title': song['title'],
                    'artist': song['primary_artist']['name'],
                    'lyrics': song['lyrics']
                }

            def get_song_name(song):
                return song['title']

            loop_and_process(songs, process_song, "Song", get_song_name,
                             raw_songs_dir)
 def process_song(song, bar):
     with open("{}/{}.json".format(verse_artists_dir,
                                   name_to_file_name(song))) as song_file:
         song = json.load(song_file)
     # TODO: we just add this here for ease, but it should be moved
     # somewhere else
     # here we remove songs that are not by someone in our artist list
     if clean_artist_names(song['artist']).strip() not in artist_list:
         return False
     verse_lyrics_set = set()
     for verse in song['verses']:
         verse['valid'] = is_verse_artist_valid(verse, artist_list) and \
             is_verse_type_valid(verse) and \
             has_enough_lines(verse) and \
             verse['lyrics'] not in verse_lyrics_set
         verse_lyrics_set.add(verse['lyrics'])
         if verse['valid']:
             verse['artist_id'] = artist_list.index(verse['artists'][0]) + 1
     for verse in song['verses']:
         if verse['valid']:
             return song
     # if all verses are invalid, remove the song
     return False
def process_song(song, bar):
    with open("{}/{}.json".format(raw_songs_dir,
                                  name_to_file_name(song))) as song_file:
        song = json.load(song_file)
    song['lyrics'] = clean_lyrics(song['lyrics'])
    return song