예제 #1
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
예제 #2
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
예제 #3
0
    def rhyme_length_fixed(self, wpos1, wpos2):
        '''
        Length of rhyme (in vowels). The first part of the rhyme ends with 
        self.words[wpos1] and the latter part with word self.words[wpos2].

        Input:
            wpos1       Word index of the last word in the first part of the rhyme.
            wpos2       Word index of the end of the rhyme.
        '''
        if wpos1 < 0:  # Don't wrap
            return 0
        elif self.words[wpos1] == self.words[wpos2]:
            return 0
        # Indices in the vowel list
        p1 = self.word_ends[wpos1]
        p2 = self.word_ends[wpos2]
        l = 0
        while self.vow[p1 - l] == self.vow[p2 - l]:
            # Make sure that exactly same words are not used
            if wpos1 > 0 and p1 - l <= self.word_ends[
                    wpos1 -
                    1] and wpos2 > 0 and p2 - l <= self.word_ends[wpos2 - 1]:
                # Get the first and last character indices of the words surrounding the vowels at p1-l and p2-l
                prev_s1 = self.vow_idxs[p1 - l]
                while prev_s1 > 0 and not ph.is_space(self.text[prev_s1 - 1]):
                    prev_s1 -= 1
                prev_s2 = self.vow_idxs[p2 - l]
                while prev_s2 > 0 and not ph.is_space(self.text[prev_s2 - 1]):
                    prev_s2 -= 1
                next_s1 = self.vow_idxs[p1 - l]
                while next_s1 < len(self.text) - 1 and not ph.is_space(
                        self.text[next_s1 + 1]):
                    next_s1 += 1
                next_s2 = self.vow_idxs[p2 - l]
                while next_s2 < len(self.text) - 1 and not ph.is_space(
                        self.text[next_s2 + 1]):
                    next_s2 += 1
                if next_s1 - prev_s1 == next_s2 - prev_s2 and self.text[
                        prev_s1:next_s1 + 1] == self.text[prev_s2:next_s2 + 1]:
                    break

            l += 1
            if p1 - l < 0 or p2 - l <= p1:
                break
        # Ignore rhymes with length 1
        if l == 1:
            l = 0
        return l
예제 #4
0
    def get_rhyme_str(self, rhyme_tuple):
        '''
        Construct a string of a given rhyme tuple.
        '''
        ret = ''
        rl, wpos1, wpos2 = rhyme_tuple
        if wpos1 is None or wpos2 is None:
            return ''
        p2 = self.vow_idxs[self.word_ends[wpos2]]
        p2_orig = p2
        # Find the ending of the last word
        while not ph.is_space(self.text[p2]):
            p2 += 1
        p0 = self.vow_idxs[self.word_ends[wpos1] - rl]
        p0_orig = p0
        # Find the beginning of the line
        while self.text[p0] != '\n' and p0 > 0:
            p0 -= 1

        cap_line = ''
        rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple)
        for i in range(p0, p2 + 1):
            if i == min(rw1) or i == min(rw2):
                cap_line += ' | ' + self.text[i]
            elif i == max(rw1) or i == max(rw2):
                cap_line += self.text[i] + '|'
            else:
                cap_line += self.text[i]
        ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line)
        return ret
예제 #5
0
    def rhyme_length_fixed(self, wpos1, wpos2):
        '''
        Length of rhyme (in vowels). The first part of the rhyme ends with 
        self.words[wpos1] and the latter part with word self.words[wpos2].

        Input:
            wpos1       Word index of the last word in the first part of the rhyme.
            wpos2       Word index of the end of the rhyme.
        '''
        if wpos1 < 0: # Don't wrap
            return 0
        elif self.words[wpos1] == self.words[wpos2]:
            return 0
        # Indices in the vowel list
        p1 = self.word_ends[wpos1]
        p2 = self.word_ends[wpos2]
        l = 0
        while self.vow[p1-l] == self.vow[p2-l]:
            # Make sure that exactly same words are not used
            if wpos1 > 0 and p1-l <= self.word_ends[wpos1-1] and wpos2 > 0 and p2-l <= self.word_ends[wpos2-1]:
                # Get the first and last character indices of the words surrounding the vowels at p1-l and p2-l
                prev_s1 = self.vow_idxs[p1-l]
                while prev_s1 > 0 and not ph.is_space(self.text[prev_s1-1]):
                    prev_s1 -= 1
                prev_s2 = self.vow_idxs[p2-l]
                while prev_s2 > 0 and not ph.is_space(self.text[prev_s2-1]):
                    prev_s2 -= 1
                next_s1 = self.vow_idxs[p1-l]
                while next_s1 < len(self.text)-1 and not ph.is_space(self.text[next_s1+1]):
                    next_s1 += 1
                next_s2 = self.vow_idxs[p2-l]
                while next_s2 < len(self.text)-1 and not ph.is_space(self.text[next_s2+1]):
                    next_s2 += 1
                if next_s1-prev_s1 == next_s2-prev_s2 and self.text[prev_s1:next_s1+1] ==  self.text[prev_s2:next_s2+1]:
                    break

            l += 1
            if p1-l < 0 or p2-l <= p1:
                break
        # Ignore rhymes with length 1
        if l == 1:
            l = 0
        return l
    def find_rhymes(self, vowel):
        rhyming_vowel_idxs = [
            self.vow_idxs[i] for i in range(len(self.vow_idxs))
            if self.vow[i] == vowel
        ]
        rhyming_words = []
        rhyming_word_indices = []
        for vow_idx in rhyming_vowel_idxs:
            start = end = vow_idx
            while not ph.is_space(self.text[start]):
                start -= 1
            while not ph.is_space(self.text[end]):
                end += 1

            word = self.text[start:end + 1].strip()
            #rhyming_word_indices.append(self.espeak_words.index(word))
            rhyming_words.append(
                self.words_orig[self.espeak_words.index(word)])
        #print(rhyming_word_indices)
        #rhyming_word = [word for i, word in enumerate(self.words_orig) if i in rhyming_word_indices]
        #print(rhyming_vowel_idxs)
        return rhyming_words
예제 #7
0
    def get_rhyme_str(self, rhyme_tuple):
        '''
        Construct a string of a given rhyme tuple.
        '''
        ret = ''
        rl, wpos1, wpos2 = rhyme_tuple
        if wpos1 is None or wpos2 is None:
            return ''
        p2 = self.vow_idxs[self.word_ends[wpos2]]
        p2_orig = p2
        # Find the ending of the last word
        while not ph.is_space(self.text[p2]):
            p2 += 1
        p0 = self.vow_idxs[self.word_ends[wpos1]-rl]
        p0_orig = p0
        # Find the beginning of the line
        while self.text[p0] != '\n' and p0 > 0:
            p0 -= 1

        cap_line = ''
        rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple)
        for i in range(p0,p2+1):
            if self.language == 'fi':
                if i in rw1 or i in rw2:
                    cap_line += self.text[i].capitalize()
                else:
                    cap_line += self.text[i]
            else:
                if i == min(rw1) or i == min(rw2):
                    cap_line += ' | ' + self.text[i]
                elif i == max(rw1) or i == max(rw2):
                    cap_line += self.text[i] + '|'
                else:
                    cap_line += self.text[i]
        #ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line)
        ret += "Longest rhyme (l={0}): {1}\n".format(int(rl), cap_line)
        if self.language != 'fi':
            # Get the corresponding lines from the original lyrics
            line_beg = self.line_idxs[p0]
            line_end = self.line_idxs[p2]
            for i in range(line_beg, line_end+1):
                if i < len(self.lines_orig):
                    ret += self.lines_orig[i] + '\n'
        return ret
예제 #8
0
    def get_rhyme_str(self, rhyme_tuple):
        '''
        Construct a string of a given rhyme tuple.
        '''
        ret = ''
        rl, wpos1, wpos2 = rhyme_tuple
        if wpos1 is None or wpos2 is None:
            return ''
        p2 = self.vow_idxs[self.word_ends[wpos2]]
        p2_orig = p2
        # Find the ending of the last word
        while not ph.is_space(self.text[p2]):
            p2 += 1
        p0 = self.vow_idxs[self.word_ends[wpos1]-rl]
        p0_orig = p0
        # Find the beginning of the line
        while self.text[p0] != '\n' and p0 > 0:
            p0 -= 1

        cap_line = ''
        rw1, rw2 = self.get_rhyming_vowels(rhyme_tuple)
        for i in range(p0,p2+1):
            if self.language == 'fi':
                if i in rw1 or i in rw2:
                    cap_line += self.text[i].capitalize()
                else:
                    cap_line += self.text[i]
            else:
                if i == min(rw1) or i == min(rw2):
                    cap_line += ' | ' + self.text[i]
                elif i == max(rw1) or i == max(rw2):
                    cap_line += self.text[i] + '|'
                else:
                    cap_line += self.text[i]
        ret += "Longest rhyme (l=%d): %s\n" % (rl, cap_line)
        if self.language != 'fi':
            # Get the corresponding lines from the original lyrics
            line_beg = self.line_idxs[p0]
            line_end = self.line_idxs[p2]
            for i in range(line_beg, line_end+1):
                if i < len(self.lines_orig):
                    ret += self.lines_orig[i] + '\n'
        return ret