def compute_vowel_representation(self): ''' Compute a representation of the lyrics where only vowels are preserved. ''' self.vow = [] # Lyrics with all but vowels removed self.vow_idxs = [] # Indices of the vowels in self.text list self.word_ends = [] # Indices of the last characters of each word self.words = [] # List of words in the lyrics self.line_idxs = [] if len(self.language) >= 2 and self.language[:2] == 'en': self.text_orig = self.text self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa') self.word_ends_orig = [] self.words_orig = [] prev_space_idx = -1 # Index of the previous space char line_idx = 0 # Line index of the current character # Go through the lyrics char by char for i in range(len(self.text)): self.line_idxs.append(line_idx) c = self.text[i] c = ph.map_vow(c, self.language) if ph.is_vow(c, self.language): # Ignore double vowels # (in English this applies probably only to 'aa' as in 'bath' # which rhymes with 'trap' that has only 'a') if i > 0 and self.text[i-1] == c: # Index of a double vowel points to the latter occurrence self.vow_idxs[-1] = i continue # TODO Diftongs should not be split (i.e. "price" should # not rhyme with "trap kit"). This has been fixed in BattleBot self.vow.append(c) self.vow_idxs.append(i) elif ph.is_space(c): if c in '\n': line_idx += 1 elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n': line_idx += 1 # If previous char was not a space, we've encountered word end if len(self.vow) > 0 and not ph.is_space(self.text[i-1]): # Put together the new word. Potential consonants in the # end are ignored new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1] # Check that the new word contains at least one vowel no_vowels = True for c2 in new_word: if ph.is_vow(c2, self.language): no_vowels = False break if no_vowels: prev_space_idx = i continue self.word_ends.append(len(self.vow)-1) self.words.append(new_word) prev_space_idx = i if len(self.language) >= 2 and self.language[:2] == 'en': self.lines_orig = self.text_orig.split('\n')
def rhyme_length_fixed(self, wpos1, wpos2): ''' Length of rhyme (in vowels). The first part of the rhyme ends with self.words[wpos1] and the latter part with word self.words[wpos2]. Input: wpos1 Word index of the last word in the first part of the rhyme. wpos2 Word index of the end of the rhyme. ''' if wpos1 < 0: # Don't wrap return 0 elif self.words[wpos1] == self.words[wpos2]: return 0 # Indices in the vowel list p1 = self.word_ends[wpos1] p2 = self.word_ends[wpos2] l = 0 while self.vow[p1 - l] == self.vow[p2 - l]: # Make sure that exactly same words are not used if wpos1 > 0 and p1 - l <= self.word_ends[ wpos1 - 1] and wpos2 > 0 and p2 - l <= self.word_ends[wpos2 - 1]: # Get the first and last character indices of the words surrounding the vowels at p1-l and p2-l prev_s1 = self.vow_idxs[p1 - l] while prev_s1 > 0 and not ph.is_space(self.text[prev_s1 - 1]): prev_s1 -= 1 prev_s2 = self.vow_idxs[p2 - l] while prev_s2 > 0 and not ph.is_space(self.text[prev_s2 - 1]): prev_s2 -= 1 next_s1 = self.vow_idxs[p1 - l] while next_s1 < len(self.text) - 1 and not ph.is_space( self.text[next_s1 + 1]): next_s1 += 1 next_s2 = self.vow_idxs[p2 - l] while next_s2 < len(self.text) - 1 and not ph.is_space( self.text[next_s2 + 1]): next_s2 += 1 if next_s1 - prev_s1 == next_s2 - prev_s2 and self.text[ prev_s1:next_s1 + 1] == self.text[prev_s2:next_s2 + 1]: break l += 1 if p1 - l < 0 or p2 - l <= p1: break # Ignore rhymes with length 1 if l == 1: l = 0 return l
def get_rhyme_str(self, rhyme_tuple): ''' Construct a string of a given rhyme tuple. ''' ret = '' rl, wpos1, wpos2 = rhyme_tuple if wpos1 is None or wpos2 is None: return '' p2 = self.vow_idxs[self.word_ends[wpos2]] p2_orig = p2 # Find the ending of the last word while not ph.is_space(self.text[p2]): p2 += 1 p0 = self.vow_idxs[self.word_ends[wpos1] - rl] p0_orig = p0 # Find the beginning of the line while self.text[p0] != '\n' and p0 > 0: p0 -= 1 cap_line = '' rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple) for i in range(p0, p2 + 1): if i == min(rw1) or i == min(rw2): cap_line += ' | ' + self.text[i] elif i == max(rw1) or i == max(rw2): cap_line += self.text[i] + '|' else: cap_line += self.text[i] ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line) return ret
def rhyme_length_fixed(self, wpos1, wpos2): ''' Length of rhyme (in vowels). The first part of the rhyme ends with self.words[wpos1] and the latter part with word self.words[wpos2]. Input: wpos1 Word index of the last word in the first part of the rhyme. wpos2 Word index of the end of the rhyme. ''' if wpos1 < 0: # Don't wrap return 0 elif self.words[wpos1] == self.words[wpos2]: return 0 # Indices in the vowel list p1 = self.word_ends[wpos1] p2 = self.word_ends[wpos2] l = 0 while self.vow[p1-l] == self.vow[p2-l]: # Make sure that exactly same words are not used if wpos1 > 0 and p1-l <= self.word_ends[wpos1-1] and wpos2 > 0 and p2-l <= self.word_ends[wpos2-1]: # Get the first and last character indices of the words surrounding the vowels at p1-l and p2-l prev_s1 = self.vow_idxs[p1-l] while prev_s1 > 0 and not ph.is_space(self.text[prev_s1-1]): prev_s1 -= 1 prev_s2 = self.vow_idxs[p2-l] while prev_s2 > 0 and not ph.is_space(self.text[prev_s2-1]): prev_s2 -= 1 next_s1 = self.vow_idxs[p1-l] while next_s1 < len(self.text)-1 and not ph.is_space(self.text[next_s1+1]): next_s1 += 1 next_s2 = self.vow_idxs[p2-l] while next_s2 < len(self.text)-1 and not ph.is_space(self.text[next_s2+1]): next_s2 += 1 if next_s1-prev_s1 == next_s2-prev_s2 and self.text[prev_s1:next_s1+1] == self.text[prev_s2:next_s2+1]: break l += 1 if p1-l < 0 or p2-l <= p1: break # Ignore rhymes with length 1 if l == 1: l = 0 return l
def find_rhymes(self, vowel): rhyming_vowel_idxs = [ self.vow_idxs[i] for i in range(len(self.vow_idxs)) if self.vow[i] == vowel ] rhyming_words = [] rhyming_word_indices = [] for vow_idx in rhyming_vowel_idxs: start = end = vow_idx while not ph.is_space(self.text[start]): start -= 1 while not ph.is_space(self.text[end]): end += 1 word = self.text[start:end + 1].strip() #rhyming_word_indices.append(self.espeak_words.index(word)) rhyming_words.append( self.words_orig[self.espeak_words.index(word)]) #print(rhyming_word_indices) #rhyming_word = [word for i, word in enumerate(self.words_orig) if i in rhyming_word_indices] #print(rhyming_vowel_idxs) return rhyming_words
def get_rhyme_str(self, rhyme_tuple): ''' Construct a string of a given rhyme tuple. ''' ret = '' rl, wpos1, wpos2 = rhyme_tuple if wpos1 is None or wpos2 is None: return '' p2 = self.vow_idxs[self.word_ends[wpos2]] p2_orig = p2 # Find the ending of the last word while not ph.is_space(self.text[p2]): p2 += 1 p0 = self.vow_idxs[self.word_ends[wpos1]-rl] p0_orig = p0 # Find the beginning of the line while self.text[p0] != '\n' and p0 > 0: p0 -= 1 cap_line = '' rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple) for i in range(p0,p2+1): if self.language == 'fi': if i in rw1 or i in rw2: cap_line += self.text[i].capitalize() else: cap_line += self.text[i] else: if i == min(rw1) or i == min(rw2): cap_line += ' | ' + self.text[i] elif i == max(rw1) or i == max(rw2): cap_line += self.text[i] + '|' else: cap_line += self.text[i] #ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line) ret += "Longest rhyme (l={0}): {1}\n".format(int(rl), cap_line) if self.language != 'fi': # Get the corresponding lines from the original lyrics line_beg = self.line_idxs[p0] line_end = self.line_idxs[p2] for i in range(line_beg, line_end+1): if i < len(self.lines_orig): ret += self.lines_orig[i] + '\n' return ret
def get_rhyme_str(self, rhyme_tuple): ''' Construct a string of a given rhyme tuple. ''' ret = '' rl, wpos1, wpos2 = rhyme_tuple if wpos1 is None or wpos2 is None: return '' p2 = self.vow_idxs[self.word_ends[wpos2]] p2_orig = p2 # Find the ending of the last word while not ph.is_space(self.text[p2]): p2 += 1 p0 = self.vow_idxs[self.word_ends[wpos1]-rl] p0_orig = p0 # Find the beginning of the line while self.text[p0] != '\n' and p0 > 0: p0 -= 1 cap_line = '' rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple) for i in range(p0,p2+1): if self.language == 'fi': if i in rw1 or i in rw2: cap_line += self.text[i].capitalize() else: cap_line += self.text[i] else: if i == min(rw1) or i == min(rw2): cap_line += ' | ' + self.text[i] elif i == max(rw1) or i == max(rw2): cap_line += self.text[i] + '|' else: cap_line += self.text[i] ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line) if self.language != 'fi': # Get the corresponding lines from the original lyrics line_beg = self.line_idxs[p0] line_end = self.line_idxs[p2] for i in range(line_beg, line_end+1): if i < len(self.lines_orig): ret += self.lines_orig[i] + '\n' return ret