Python Lyrics.get_avg_rhyme_length Examples

Programming Language: Python

Namespace/Package Name: lyrics

Class/Type: Lyrics

Method/Function: get_avg_rhyme_length

Examples at hotexamples.com: 3

Python Lyrics.get_avg_rhyme_length - 3 examples found. These are the top rated real world Python examples of lyrics.Lyrics.get_avg_rhyme_length extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Lyrics(12)

about(3)

onChangeTokens(1)

setLyricRequest(1)

scrape(1)

save(1)

refresh(1)

populateLyrics(1)

get_lyrics(1)

get_rhymes(1)

__init__(1)

get_longest_rhyme(1)

get_avg_rhyme_length(1)

find(1)

copyLyricsIfSameTrack(1)

addlyric(1)

setProvider(1)

Example #1

Show file

File: raplyzer.py Project: d-kz/raplysaattori

def read_lyrics(lyrics_dir='lyrics_en', artist=None, album=None, 
                print_stats=False, language='en-us', lookback=15):
    '''
    Read lyrics and compute Rhyme factor (riimikerroin) for each
    artist.

    Input:
        lyrics_dir  Path to the directory containing the lyrics.
        artist      Name of the artist directory under lyrics_dir (if this is
                    not provided, all artists are analyzed).
        album       Name of the album directory under lyrics_dir/artist/
        print_stats Whether we print summary statistics for each individual
                    song.
        language    Use either Finnish (fi), American English (en-us), 
                    or English (en).
        lookback    How many previous words are checked for rhymes. For
                    Finnish I've used 10 and for English 15.
    '''
    if artist is not None:
        artists = [artist]
    else:
        artists = os.listdir(lyrics_dir)
    artist_scores = []
    song_scores = []
    song_names = []
    uniq_words = []
    longest_rhymes = []
    max_rhymes = 5
    for a in artists:
        print "Analyzing artist: %s" % a
        rls = []
        all_words = []
        if album is not None:
            albums = [album]
        else:
            albums = os.listdir(os.path.join(lyrics_dir, a))
            albums = sort_albums_by_year(albums)
        for al in albums:
            album_rls = []
            songs = os.listdir(os.path.join(lyrics_dir, a, al))
            # Only the .txt files
            songs = [s for s in songs if len(s)>=4 and s[-4:]=='.txt']
            for song in songs:
                file_name = os.path.join(lyrics_dir, a, al, song)
                l = Lyrics(file_name, print_stats=print_stats, 
                           language=language, lookback=lookback)
                rl = l.get_avg_rhyme_length()
                rls.append(rl)
                song_scores.append(rl)
                song_names.append(file_name)
                album_rls.append(rl)
                if len(longest_rhymes) < max_rhymes:
                    heapq.heappush(longest_rhymes, l.get_longest_rhyme())
                else:
                    heapq.heappushpop(longest_rhymes, l.get_longest_rhyme())

                if language == 'fi':
                    all_words += l.text.split()
                else:
                    text = l.text_orig.lower()
                    rx = re.compile(u'[^\wåäö]+')
                    text = rx.sub(' ', text)
                    all_words += text.split()
            # Print stats for the album
            #print "%s - %s: %.3f" % (a, al, np.mean(np.array(album_rls)))
            #print "%.5f" % (np.mean(np.array(album_rls)))

        # Compute the number of unique words the artist has used
        n_words = len(all_words)
        min_w = 20000
        if n_words >= min_w:
            n_uniq_words = len(set(all_words[:min_w]))
            uniq_words.append(n_uniq_words)
        else:
            uniq_words.append(-n_words)
        mean_rl = np.mean(np.array(rls))
        artist_scores.append(mean_rl)

    # Sort the artists based on their avg rhyme lengths
    artist_scores = np.array(artist_scores)
    artists = np.array(artists)
    uniq_words = np.array(uniq_words)
    order = np.argsort(artist_scores)[::-1]
    artists = artists[order]
    uniq_words = uniq_words[order]
    artist_scores = artist_scores[order]

    print "\nBest rhymes"
    while len(longest_rhymes) > 0:
        l, rhyme = heapq.heappop(longest_rhymes)
        print rhyme

    print "\nBest songs:"
    song_scores = np.array(song_scores)
    song_names = np.array(song_names)
    song_names = song_names[np.argsort(song_scores)[::-1]]
    song_scores = sorted(song_scores)[::-1]
    for i in range(min(10,len(song_scores))):
        print '%.3f\t%s' % (song_scores[i], song_names[i])

    print "\nBest artists:"
    for i in range(len(artist_scores)):
        rx = re.compile(u'_')
        name = rx.sub(' ', artists[i])
        print '%d.\t%.3f\t%s' % (i+1, artist_scores[i], name)

Example #2

Show file

File: raplyzer.py Project: pjh5/nml502_spring2016

def read_lyrics(lyrics_dir='lyrics', 
				print_stats=False, language='en-us', lookback=30):
	'''
	Read lyrics and compute Rhyme factor (riimikerroin) for each
	artist.

	Input:
		lyrics_dir  Path to the directory containing the lyrics.
		artist      Name of the artist directory under lyrics_dir (if this is
					not provided, all artists are analyzed).
		album       Name of the album directory under lyrics_dir/artist/
		print_stats Whether we print summary statistics for each individual
					song.
		language    Use either Finnish (fi), American English (en-us), 
					or English (en).
		lookback    How many previous words are checked for rhymes. For
					Finnish I've used 10 and for English 15.
	'''

	# Set up CSV file to add the stats of each song to
	with open('raplyzer_out.csv', 'wb') as csvfile:
		csvwriter = csv.writer(csvfile, delimiter=',', lineterminator='\n',
								quotechar='|', quoting=csv.QUOTE_MINIMAL)
		csvwriter.writerow(["Artist", "Song", "Longest Rhyme Length", "Average Rhyme"])

	for a in os.listdir(lyrics_dir):
		print "Analyzing artist: %s" % a

		songs = os.listdir(lyrics_dir + '/' + a)
		songs = [s for s in songs if len(s) > 4 and s[-4:]=='.txt']
		for song in songs:
			file_name = lyrics_dir + '/' + a + '/' + song
			try:
				l = Lyrics(file_name, print_stats=print_stats, language='en-us', lookback=lookback)
				long_r = l.get_longest_rhyme()
				avg_r = l.get_avg_rhyme_length()
				print "\n%s -- %s" % (a, song)

			# Exception reading the file, scrap it and move on
			except:
				print 'Exception reading file ', file_name
				print '\tException: %s' % sys.exc_info()[0]
				long_r = (-1, "")
				avg_r = -1

			# Song file succesfully read
			# Calculate all the statistics we want
			else:
				# Calculate word statistics
				# text = l.text_orig.lower()
				# rx = re.compile(u'[^\wåäö]+')
				# text = rx.sub(' ', text)
				# all_words = text.split()
				# n_uwords = len(set(all_words))
				# n_words = len(all_words)
				# per_uwords = n_uwords / float(n_words)

				# Add the statistics to the csv file
				with open('raplyzer_out.csv', 'ab') as csvfile:
					csvwriter = csv.writer(csvfile, delimiter=',',
											quotechar='|', quoting=csv.QUOTE_MINIMAL)
					csvwriter.writerow([a, song, long_r[0], avg_r])

Example #3

Show file

File: raplyzer.py Project: ekQ/raplysaattori

def read_lyrics(lyrics_dir='lyrics_en', artist=None, album=None, 
                print_stats=False, language='en-us', lookback=15):
    '''
    Read lyrics and compute Rhyme factor (riimikerroin) for each
    artist.

    Input:
        lyrics_dir  Path to the directory containing the lyrics.
        artist      Name of the artist directory under lyrics_dir (if this is
                    not provided, all artists are analyzed).
        album       Name of the album directory under lyrics_dir/artist/
        print_stats Whether we print summary statistics for each individual
                    song.
        language    Use either Finnish (fi), American English (en-us), 
                    or English (en).
        lookback    How many previous words are checked for rhymes. For
                    Finnish I've used 10 and for English 15.
    '''
    if artist is not None:
        artists = [artist]
    else:
        artists = os.listdir(lyrics_dir)
    artist_scores = []
    song_scores = []
    song_names = []
    uniq_words = []
    longest_rhymes = []
    max_rhymes = 5
    for a in artists:
        print "Analyzing artist: %s" % a
        rls = []
        all_words = []
        if album is not None:
            albums = [album]
        else:
            albums = os.listdir(os.path.join(lyrics_dir, a))
            albums = sort_albums_by_year(albums)
        for al in albums:
            album_rls = []
            songs = os.listdir(os.path.join(lyrics_dir, a, al))
            # Only the .txt files
            songs = [s for s in songs if len(s)>=4 and s[-4:]=='.txt']
            for song in songs:
                file_name = os.path.join(lyrics_dir, a, al, song)
                l = Lyrics(file_name, print_stats=print_stats, 
                           language=language, lookback=lookback)
                rl = l.get_avg_rhyme_length()
                rls.append(rl)
                song_scores.append(rl)
                song_names.append(file_name)
                album_rls.append(rl)
                if len(longest_rhymes) < max_rhymes:
                    heapq.heappush(longest_rhymes, l.get_longest_rhyme())
                else:
                    heapq.heappushpop(longest_rhymes, l.get_longest_rhyme())

                if language == 'fi':
                    all_words += l.text.split()
                else:
                    text = l.text_orig.lower()
                    rx = re.compile(u'[^\wåäö]+')
                    text = rx.sub(' ', text)
                    all_words += text.split()
            # Print stats for the album
            #print "%s - %s: %.3f" % (a, al, np.mean(np.array(album_rls)))
            #print "%.5f" % (np.mean(np.array(album_rls)))

        # Compute the number of unique words the artist has used
        n_words = len(all_words)
        min_w = 20000
        if n_words >= min_w:
            n_uniq_words = len(set(all_words[:min_w]))
            uniq_words.append(n_uniq_words)
        else:
            uniq_words.append(-n_words)
        mean_rl = np.mean(np.array(rls))
        artist_scores.append(mean_rl)

    # Sort the artists based on their avg rhyme lengths
    artist_scores = np.array(artist_scores)
    artists = np.array(artists)
    uniq_words = np.array(uniq_words)
    order = np.argsort(artist_scores)[::-1]
    artists = artists[order]
    uniq_words = uniq_words[order]
    artist_scores = artist_scores[order]

    print "\nBest rhymes"
    while len(longest_rhymes) > 0:
        l, rhyme = heapq.heappop(longest_rhymes)
        print rhyme

    print "\nBest songs:"
    song_scores = np.array(song_scores)
    song_names = np.array(song_names)
    song_names = song_names[np.argsort(song_scores)[::-1]]
    song_scores = sorted(song_scores)[::-1]
    for i in range(min(10,len(song_scores))):
        print '%.3f\t%s' % (song_scores[i], song_names[i])

    print "\nBest artists:"
    for i in range(len(artist_scores)):
        rx = re.compile(u'_')
        name = rx.sub(' ', artists[i])
        print '%d.\t%.3f\t%s' % (i+1, artist_scores[i], name)