Esempio n. 1
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
Esempio n. 2
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
Esempio n. 3
0
    def get_rhyming_vowels(self, rhyme_tuple):
        '''
        Return the indices of the rhyming vowels of the longest rhyme.

        Output:
            Tuple with the indices of the first part and the second part of
            the rhyme separately.
        '''
        rl, wpos1, wpos2 = rhyme_tuple
        if wpos1 is None or wpos2 is None:
            return ([-1], [-1])

        # The first part of the rhyme
        rhyme_idxs1 = []  # Indices of the rhyming vowels
        n_caps = 0
        p = self.vow_idxs[self.word_ends[wpos1]]
        while n_caps < rl:
            if ph.is_vow(self.text[p], self.language):
                rhyme_idxs1.append(p)
                # Increase the counter only if the vowel is not a double vowel
                if self.text[p] != self.text[p + 1]:
                    n_caps += 1
            p -= 1

        # The second part of the rhyme
        rhyme_idxs2 = []  # Indices of the rhyming vowels
        n_caps = 0
        p = self.vow_idxs[self.word_ends[wpos2]]
        p_last = p
        while n_caps < rl:
            if ph.is_vow(self.text[p], self.language):
                rhyme_idxs2.append(p)
                # Increase the counter only if the vowel is not a double vowel.
                # The last vowel must be always counted.
                if p == p_last or self.text[p] != self.text[p + 1]:
                    n_caps += 1
            p -= 1

        return (rhyme_idxs1, rhyme_idxs2)
Esempio n. 4
0
    def get_rhyming_vowels(self, rhyme_tuple):
        '''
        Return the indices of the rhyming vowels of the longest rhyme.

        Output:
            Tuple with the indices of the first part and the second part of
            the rhyme separately.
        '''
        rl, wpos1, wpos2 = rhyme_tuple
        if wpos1 is None or wpos2 is None:
            return ([-1],[-1])

        # The first part of the rhyme
        rhyme_idxs1 = [] # Indices of the rhyming vowels
        n_caps = 0
        p = self.vow_idxs[self.word_ends[wpos1]]
        while n_caps < rl:
            if ph.is_vow(self.text[p], self.language):
                rhyme_idxs1.append(p)
                # Increase the counter only if the vowel is not a double vowel
                if self.text[p] != self.text[p+1]:
                    n_caps += 1
            p -= 1

        # The second part of the rhyme
        rhyme_idxs2 = [] # Indices of the rhyming vowels
        n_caps = 0
        p = self.vow_idxs[self.word_ends[wpos2]]
        p_last = p
        while n_caps < rl:
            if ph.is_vow(self.text[p], self.language):
                rhyme_idxs2.append(p)
                # Increase the counter only if the vowel is not a double vowel.
                # The last vowel must be always counted.
                if p == p_last or self.text[p] != self.text[p+1]:
                    n_caps += 1
            p -= 1

        return (rhyme_idxs1, rhyme_idxs2)
Esempio n. 5
0
    def Get_vowel(self, rhyme_list):
        vowel_list = []
        vowel_last_index = []
        all_vowel = []
        for k in range(len(rhyme_list)):
            vowel = ''
            index = -1
            for j in range(len(rhyme_list[k])):

                c = ph.map_vow(rhyme_list[k][j], 'en-g')
                if ph.is_vow(c, 'en-g'):
                    vowel = vowel + c
                    index = j
                    all_vowel.append(c)
            vowel_list.append(vowel)
            vowel_last_index.append(index)
        return vowel_list, vowel_last_index, all_vowel
Esempio n. 6
0
    def find_rhymes(self, rhyme_str):
        if rhyme_str not in self.text:
            return ''
        vows = set()
        for i in range(len(rhyme_str)):
            if ph.is_vow(rhyme_str[i]):
                vows.add(rhyme_str[i])

        word_pos = []
        words = []
        for vow in vows:
            for i in range(len(self.words)):
                word = self.words[i]
                if vow in word:
                    word_pos.append(i)
                    words.append(word)

        return words, word_pos