def mutate_limerick(limerick):
    line_to_mutate = randint(1, 5)
    if line_to_mutate == 1:
        line = limerick.line_1
    elif line_to_mutate == 2:
        line = limerick.line_2
    elif line_to_mutate == 3:
        line = limerick.line_3
    elif line_to_mutate == 4:
        line = limerick.line_4
    else:
        line = limerick.line_5

    word_to_mutate = line[randint(1, len(line)) - 1]

    new_word = sample(words.words(), 1)[0]
    phones = pronouncing.phones_for_word(new_word)

    # makes sure that the word has a definition
    while (len(phones) < 1):
        new_word = sample(words.words(), 1)[0]
        phones = pronouncing.phones_for_word(new_word)

    new_line = line.replace(word_to_mutate, new_word)

    if line_to_mutate == 1:
        limerick.set_line_1(new_line)
    elif line_to_mutate == 2:
        limerick.set_line_2(new_line)
    elif line_to_mutate == 3:
        limerick.set_line_3(new_line)
    elif line_to_mutate == 4:
        limerick.set_line_4(new_line)
    else:
        limerick.set_line_5(new_line)
Example #2
0
def generate_line(syllable_number, rhyme_word, word_list):
    phones = pronouncing.phones_for_word(rhyme_word)
    phone = random.choice(phones)
    rhyme_syllabi_count = pronouncing.syllable_count(phone)
    line_syllabi_count = rhyme_syllabi_count
    line = rhyme_word
    timeout_counter = 0
    # print("in line syllabi count loop")
    while line_syllabi_count != syllable_number:
        over_limit = (line_syllabi_count > syllable_number)
        timeout = (timeout_counter > 100000)
        # print(line_syllabi_count)
        # print(syllable_number)
        # print(timeout_counter)
        # print(timeout_counter > 100000)
        # print(over_limit)
        # print(over_limit and timeout)
        if over_limit is False:
            word = random.choice(word_list).rstrip()
            phones = pronouncing.phones_for_word(word)
            if len(phones) != 0:
                word_syllabi_count = pronouncing.syllable_count(phone)
                line_syllabi_count += word_syllabi_count
                line = word + " " + line
        elif over_limit:
            if timeout:
                return line
            line_syllabi_count = rhyme_syllabi_count
            line = rhyme_word

        timeout_counter += 1
    return line
 def test_phones_for_word(self):
     phones = pronouncing.phones_for_word("conflicts")
     self.assertEqual(len(phones), 4)
     self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
     # not in the dictionary (presumably)
     phones = pronouncing.phones_for_word("asdfasdfasdf")
     self.assertEqual(phones, [])
Example #4
0
def findLineStress(tokenized_line):
    '''
    find accentual stress of a given tokenized line, based on CMU dict.
    Uses relative stress per word, so somewhat limited.

    Parameters
    ----------
    tokenized_line : list
        list of tokens from line, usually preprocessed to remove non-words

    Returns
    -------
    parselist: list of potential stresses after parsing.
        0 is unstressed, 1 is primary stress, 2 is secondary stress (middle)
    '''

    parses = ['']
    for word in tokenized_line:
        pros = pronouncing.phones_for_word(word)
        if pros:
            for phonelist in [pronouncing.phones_for_word(word)]:
                stressOptions = deepcopy(parses)
                currLen = len(parses)
                newparse = []
                # I don't really need to loop through pronunciations
                # just distinct stress patterns, so a little inefficient here
                for pronunciation in phonelist:
                    wordStress = pronouncing.stresses(pronunciation)
                    for option in range(currLen):
                        newparse.append('' + str(stressOptions[option]) +
                                        str(wordStress))
            parses = newparse

    return list(set(parses))
Example #5
0
def split_spellings(sentence, full_pronounciation_output=False):
    word_array = tokenize.WhitespaceTokenizer().tokenize(sentence)
    print(word_array)
    for word in word_array:
        word = string_cleaner(word)
        if word == "":
            continue
        if word.isdigit():
            numword = inflect_engine.number_to_words(word)
            numword = string_cleaner(numword)
            print(numword)
            if " " in numword:
                numword = numword.split(" ")
            for element in numword:
                output = pronouncing.phones_for_word(element)
                if not full_pronounciation_output:
                    yield output[0]
                else:
                    yield output
        else:
            output = pronouncing.phones_for_word(word)
            if not full_pronounciation_output:
                yield output[0]
            else:
                yield output
 def test_phones_for_word(self):
     phones = pronouncing.phones_for_word("conflicts")
     self.assertEqual(len(phones), 4)
     self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
     # not in the dictionary (presumably)
     phones = pronouncing.phones_for_word("asdfasdfasdf")
     self.assertEqual(phones, [])
Example #7
0
def fallback_get_phonemes(name, lang="en"):
    if not lang.startswith("en"):
        return ValueError("Unsupported language")
    name = name.lower()
    phonemes = None
    if " " in name:
        total_phonemes = []
        names = name.split(" ")
        for name in names:
            phon = fallback_get_phonemes(name)
            if phon is None:
                return None
            total_phonemes.extend(phon)
            total_phonemes.append(" . ")
        if total_phonemes[-1] == " . ":
            total_phonemes = total_phonemes[:-1]
        phonemes = "".join(total_phonemes)
    elif len(pronouncing.phones_for_word(name)):
        phonemes = "".join(pronouncing.phones_for_word(name)[0])
    else:
        guess = fallback_guess_phonemes(name)
        if guess is not None:
            phonemes = " ".join(guess)

    return phonemes
Example #8
0
def findLineStress(line):
    '''find accentual stress of a given line, based on CMU dict.  Still a bit unclever.
    
    _parameters_
    line: line of text
    
    _returns_
    parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle)
    syllableLengths: list of syllable lengths corresponding to the parses in parselist
    wordCount: count of words in the line 
    '''
    line = prepString(removeMarkupWords(line))
    words = line.split()
    wordCount = len(words)
    parses = ['']
    for word in words:
        pros = pronouncing.phones_for_word(word)
        if pros:
            for phonelist in [pronouncing.phones_for_word(word)]:
                stressOptions = copy.deepcopy(parses)
                currLen = len(parses)
                newparse = []
                # I don't really need to loop through pronunciations, just distinct stress patterns, so a little inefficient here
                for pronunciation in phonelist:
                    wordStress = pronouncing.stresses(pronunciation)
                    for option in range(currLen):
                        newparse.append('' + str(stressOptions[option]) +
                                        str(wordStress))
            parses = newparse

    return list(set(parses)), [len(parse)
                               for parse in list(set(parses))], wordCount
Example #9
0
def syllable_count(sentence):
    phones = []
    for p in sentence.split():
        if not pronouncing.phones_for_word(p.strip()) == []:
            phones.append(pronouncing.phones_for_word(p.strip())[0])
        else:
            phones.append("")
    return sum([pronouncing.syllable_count(p) for p in phones])
def leven_distance(old_word, new_word):

    #1st Technique
    w_len_ch_old = len(old_word)
    w_len_ch_new = len(new_word)
    w_len_ch = min(w_len_ch_old, w_len_ch_old)

    #distance
    dis_ch = Levenshtein.distance(old_word, new_word)
    ratio_ch = (w_len_ch - dis_ch) / w_len_ch

    #2nd and 3rd Technique
    #Checking if phonetic representation exisist or not
    if (len(pp.phones_for_word(old_word))) > 0 and len(
            pp.phones_for_word(new_word)) > 0:

        #2nd Technique
        #Getting phonetic representation
        old_word_phs = pp.phones_for_word(old_word)[0]
        new_word_phs = pp.phones_for_word(new_word)[0]

        w_len_phs_old = len(old_word_phs)
        w_len_phs_new = len(new_word_phs)

        w_len_phs = min(w_len_phs_old, w_len_phs_new)

        #distance
        dis_phs = Levenshtein.distance(old_word_phs, new_word_phs)
        ratio_phs = (w_len_phs - dis_phs) / w_len_phs

        #3rd Technique
        #Getting phonetic representation without spaces
        old_word_ph = old_word_phs.replace(" ", "")
        new_word_ph = new_word_phs.replace(" ", "")

        w_len_ph_old = len(old_word_ph)
        w_len_ph_new = len(new_word_ph)

        w_len_ph = min(w_len_ph_old, w_len_ph_new)

        #distance
        dis_ph = Levenshtein.distance(old_word_ph, new_word_ph)
        ratio_ph = (w_len_ph - dis_ph) / w_len_ph

    #Assigning a large value to get only true cases
    else:
        ratio_ph = -1000
        ratio_phs = -1000

    #Returning max ratio from all three Technique
    ratio = max(ratio_ch, ratio_ph, ratio_phs)

    #Assigning smallest value to get only true cases
    if (ratio > 0.3):
        return ratio
    else:
        return 0.0001
Example #11
0
def syllable_counter(lines):
    '''
    Function to count all syllables in a list of strings.

    NOTE: This does not factor in multi-syllabic digits,
    times (i.e. 1:03), and most likely other non-"word" words.


    Input
    -----
    lines : list (str)
        List of strings to count.


    Output
    ------
    sum(total) : int
        Total number of syllables in the input list.



    [Modified from Allison Parrish's example in the documention
     for her library, pronouncing]:
    https://pronouncing.readthedocs.io/en/latest/tutorial.html

    '''
    # create empty list
    total = []

    # loop over list
    for line in lines:

        # turn each word into a string of its phonemes
        # if else statement ensures that each word is counted with
        # at least one syllable, even if that word is not in the
        # pronouncing library's dictionary (using phoneme for 'I'
        # as a placeholder for single syllable)
        phonemes = [
            pronouncing.phones_for_word(word)[0]
            if pronouncing.phones_for_word(word) else 'AY1'
            for word in line.split()
        ]

        # count the syllables in each string and add the total
        # syllables per line to the total list
        total.append(
            sum([pronouncing.syllable_count(phoneme) for phoneme in phonemes]))

    # return the total number of syllables
    return sum(total)
Example #12
0
def pronunciationSimilarity(str1, str2):
    '''
    Finds the relative edit distance of the phones of two strings.
    '''
    try:
        pronunciation1 = pronouncing.phones_for_word(str1.lower())
        pronunciation2 = pronouncing.phones_for_word(str2.lower())

        phones1 = pronunciation1[0].split()
        phones2 = pronunciation2[0].split()

        return editSimilarity(phones1, phones2)
    except:
        return 1
Example #13
0
def rhymes_all_words(word, mc):
    """
    Look for all words that rhyme with word in our markov chain
    :param word: string
    :param mc: dict
    :return: rhyme_list: list

    Examples
    --------
    >>>rhymes_all_words('people', markov_chain)
    ['adorable', 'example', 'little', 'professional', 'people', 'sentimental', 'mutual'... ]

    >>>rhymes_all_words('5-foot', markov_chain)
    ['put', 'underfoot', 'foot']
    """
    if "-" in word:
        word = word.split("-")[-1]
    # Finding all words that rhyme with word, disregarding the word's mc
    words = mc.keys()
    regex = re.compile("^([A-Z])\w+([a-zA-Z]+[-'][a-zA-Z]+)|([a-zA-Z]+\.)|([a-zA-Z])+$")
    words = [w for w in words if regex.match(w)]
    try:
        word_pron = pronouncing.phones_for_word(word)[0].split()
    except:
        print('no pron ' + word)
        return 'a'

    index = -1
    for pron in reversed(word_pron):
        if not pron.isalpha():
            index = word_pron.index(pron)
            break

    word_prons = []
    # get array of the parts of the word pronunciation that must be compared for rhyming
    for wp in pronouncing.phones_for_word(word):
        wp = wp.split()[index - len(word_pron):]
        word_prons.append(wp)

    rhyme_list = []
    for w in words:  # look at all words in word mc
        if pronouncing.phones_for_word(w):  # if we can get the words pron
            for w_pron in pronouncing.phones_for_word(w):
                w_pron = w_pron.split()
                if len(w_pron) > (len(word_pron) - index) and w_pron[index - len(word_pron):] in word_prons:
                    rhyme_list.append(w)
                    break
    return rhyme_list
Example #14
0
def countable_corpus(corpus):
    '''
    Function to convert a corpus to a Markov dictionary where all of the words
    are "countable", i.e. they appear in the CMU dictionary associated with the
    `pronouncing` package and thus their syllables and phonemes can be counted
    and utilized.

    This is necessary for generators that have syllabic constraints.


    Input
    -----
    corpus : str
        Corpus as one long string.


    Output
    ------
    text_dictionary : dict [str, list (str)]
        Markov dictionary with each (countable) word that appears in the corpus
        as keys, with each value being a list of (countable) words that follow
        that key.

    '''

    # instantiate a dictionary
    text_dictionary = defaultdict(list)

    # create Markov dictionary
    # iterate over list of each word and its subsequent word
    for current_word, next_word in zip(corpus, corpus[1:]):

        # append word to list as long as its phonemes can be counted
        if phones_for_word(next_word):
            text_dictionary[current_word].append(next_word)

        # otherwise choose a random word from the corpus whose phonemes can
        # NOTE: introduces some randomness! preventing assured reproducibility
        else:
            word = ''
            while not word:
                word = random.choice(corpus)
                if phones_for_word(word):
                    text_dictionary[current_word].append(word)
                else:
                    word = ''

    return text_dictionary
Example #15
0
def get_rhyming_groups(group_size, number_groups, pool):
    """Returns a list of rhyming groups of the given size from the given candidate pool.

    Args:
        group_size (int): number of lines in the rhyming group.
        number_groups (int): number of rhyming groups.
        pool (list) : candidate pool from which to draw lines.

    Raises:
         InsufficientSentencesError: if the candidate pool is not rich enough.
    """
    clusters = defaultdict(list)
    while len(list(filter(lambda c: len(c) >= group_size,
                          clusters.values()))) < number_groups:
        try:
            sentence = pool.pop()
        except KeyError:
            raise InsufficientSentencesError(
                'Candidate pool is not rich enough!')
        last_word = sentence.split(" ")[-1]
        last_word_phones = pronouncing.phones_for_word(last_word)[0]
        rhyming_part = pronouncing.rhyming_part(last_word_phones)
        if last_word not in [s.split(" ")[-1] for s in clusters[rhyming_part]]:
            clusters[rhyming_part].append(sentence)
    groups = list(filter(lambda c: len(c) >= group_size, clusters.values()))
    random.shuffle(groups)
    return [random.sample(group, group_size) for group in groups]
Example #16
0
def last_vowel_phones(line, num_phones):
    last_word = ending_word(line)
    vowels = {
        'AA1', 'AE1', 'AH1', 'AO1', 'AW1', 'AY1', 'EH1', 'ER1', 'EY1', 'IH1',
        'IY1', 'OW1', 'OY1', 'UH1', 'UW1', 'AA0', 'AE0', 'AH0', 'AO0', 'AW0',
        'AY0', 'EH0', 'ER0', 'EY0', 'IH0', 'IY0', 'OW0', 'OY0', 'UH0', 'UW0'
    }

    # get the ending phoneme of the word
    phones = pronouncing.phones_for_word(last_word)
    if phones:
        phones = phones[0]
        phones_split = phones.split()
        result_phones = []
        vowel_phones = []
        for phone in phones_split:
            # print (phone)
            if phone in vowels:
                vowel_phones.append(phone)

        if len(vowel_phones) >= num_phones:
            for i in range(1, num_phones + 1):
                result_phones.append(vowel_phones[-i])
        else:
            result_phones.append(vowel_phones[-1])
            return 0
        return result_phones
    else:
        return 0
Example #17
0
    def line_rhymescheme(self, line):
        """
        Creates rhyme scheme for a given line.

        :param line: line of lyrics, str
        :return: last two morphemes of the last word in line, str
        """
        end_word = re.sub(r"\W+", '', get_last_word(line)).lower()
        pronunciation_list = pronouncing.phones_for_word(end_word)
        if pronunciation_list:
            potential_rhymes = {}
            sound_pairs = []
            for item in pronunciation_list:
                sound_pair = item.split(' ')[-2:]
                if len(sound_pair) < 2:
                    for sound1 in self.sound_dict[sound_pair[0]]:
                        sound_pairs.append((sound1, ''))
                else:
                    for sound1 in self.sound_dict[sound_pair[0]]:
                        for sound2 in self.sound_dict[sound_pair[1]]:
                            sound_pairs.append((sound1, sound2))
            for sound_pair in sound_pairs:
                if sound_pair not in potential_rhymes.keys():
                    potential_rhymes[sound_pair] = 0
                if sound_pair[1] == '':
                    potential_rhymes[sound_pair] += len(pronouncing.search(sound_pair[0] + "$"))
                else:
                    potential_rhymes[sound_pair] += len(pronouncing.search(sound_pair[0] + " " + sound_pair[1] + "$"))
            most_freq_pair = max(potential_rhymes.items(), key=operator.itemgetter(1))[0]
            rhymescheme = most_freq_pair[0] + ' ' + most_freq_pair[1]
        else:
            rhymescheme = end_word[-2:]
        return rhymescheme
def test():
    keep_going = True
    while keep_going:
        word = input("Please enter a word (Enter '0' to quit): ")
        if word == '0':
            keep_going = False
        elif word == "":
            pass
        else:
            print(cfd[word].keys(), cfd[word].values())
            print()
            print("Random 5 words following", word)
            print(random_word_generator(word, 5))
            print()
            print("Pronunciations of", word)
            print(pronouncing.phones_for_word(word))
            print()
            print("Syllables in", word)
            print(count_syllables(word))
            print()
            print("Rhymes for", word)
            print(get_rhymes(word))
            print()
            print("Stresses for", word)
            print(get_stresses(word))
            print()
Example #19
0
def is_iambic(phrase):
    """
    check that we satisfy iambic meter.
    return 1 if so, otherwise 0. 
    definitely an imperfect check...
    if we end up needing to check a word that's not in the CMU dictionary, just return 0. 
    """
    meter = ''
    for word in phrase.split():
        word = word.strip().strip(string.punctuation).lower()
        try:
            phones_list = pronouncing.phones_for_word(word)
            stresses = pronouncing.stresses(phones_list[0])
            if len(stresses) == 1:
                if stresses == '1':
                    stresses = '2'  # allow ambiguity for 1-syllable words with stress 1
            meter += stresses  # just default to the first pronunciation if > 1 given
        except:
            return 0  # word not found
    meter = [int(x) for x in meter]
    even_stresses_full = [meter[i] for i in range(0, len(meter), 2)]
    odd_stresses_full = [meter[i] for i in range(1, len(meter), 2)]
    even_stresses = set(even_stresses_full)
    odd_stresses = set(odd_stresses_full)
    if 0 in odd_stresses:
        return 0
    if 1 in even_stresses:
        return 0
    return 1
Example #20
0
    def _new_sentence2(self, syls):
        syls = int(syls)
        sent = None
        phones = []
        while sent == None or sum([pnc.syllable_count(p)
                                   for p in phones]) != syls:
            print(sent)
            print(sum([pnc.syllable_count(p) for p in phones]) - syls)
            sent = self.text_model.make_short_sentence(
                syls * self.config.poem_avg_char_per_syl,
                tries=100,
                max_overlap_ratio=self.config.markovify_max_overlap_ratio,
                max_overlap_total=self.config.markovify_max_overlap_total)
            if sent == None:
                continue

            sentNoPunctuation = sent[0:-1]
            try:
                phones = [
                    pnc.phones_for_word(p)[0]
                    for p in sentNoPunctuation.split()
                ]
            except IndexError:
                # Word not found in dictionary
                phones = []

        return ''.join(c for c in sent if c not in string.punctuation)
Example #21
0
def phones_for_closest_match(word):
    """Brute force. Look for lowest distance between all words that are in
    the CMU dictionary.

    """
    by_distance = []
    for possibility in pronouncing.pronunciations:

        # levenstein
        distance = editdistance.eval(possibility, word)

        # give a bonus for same first letter / last letter
        if possibility.startswith(word[0]):
            distance -= 1
        if possibility.endswith(word[-1]):
            distance -= 1 

        # break ties with difference in length
        character_difference = abs(len(possibility) - len(word))
        by_distance.append((distance, character_difference, possibility))

    # find the lowest (final tie breaker is alphabetical, oh well)
    d_edit, d_length, suggestion = min(by_distance)

    # return the suggestion and the phones for the suggestion
    return suggestion, pronouncing.phones_for_word(suggestion)
Example #22
0
def phones_for_word(word):
    """Look up a word in the CMU dictionary for it's phones. If it's not
    in there, first deal with hyphens and then use an approximate
    match as a fallback.

    """
    # return a blank phone string for a blank word
    if not word:
        return word, ['']

    # try to look up in dictionary
    phones = pronouncing.phones_for_word(word)
    if phones:
        return word, phones

    # for hyphenated words, look up each word independently and then
    # join back up
    if "-" in word:
        phone_list = []
        for word in word.split('-'):
            suggested, phones = phones_for_word(word)
            phone_list.append((suggested, phones[0]))
        phones = [' '.join(p for (w, p) in phone_list if w)]
        word = '-'.join(w for (w, p) in phone_list)
        return word, phones

    else:
        return phones_for_closest_match(word)
Example #23
0
def search_match_by_regex():
    phones_sigh = pronouncing.phones_for_word("jest")[0]
    print phones_sigh
    print pronouncing.search(phones_sigh)[:5]
    #finds all of the words that end in -iddle
    phone_iddle = pronouncing.search("IH1 D AH0 L$")
    print phone_iddle
Example #24
0
 def pick_rhyme_for_word(self, word, forbidden_pronunciations=[], line=True):
     pronunciations = pronouncing.phones_for_word(word)
     for forbidden in forbidden_pronunciations:
         if forbidden in pronunciations:
             # what the f**k... how can this happen?
             pronunciations.remove(forbidden)
         else:
             print("WHAAAT?? word: {} pronunciations: {} forbidden: {}".format(word, pronunciations, forbidden))
     if len(pronunciations) > 0:
         # choose a pronunciation of the word at random and extract the rhyme phonemes
         chosen_pronunciation = random.choice(pronunciations)
         rhyming_part = pronouncing.rhyming_part(chosen_pronunciation)
         # consider the other words which rhyme with these phonemes
         various_rhymes = self.by_rhyming_part[rhyming_part]
         rhyme_words = list(various_rhymes.keys())
         if len(rhyme_words) > 1 and word in rhyme_words:
             # sometimes it doesn't show up, if it's the only line with that ending.
             rhyme_words.remove(word) # don't rhyme it with itself
         if len(rhyme_words) == 0:
             # sadness. try another pronunciation
             return self.pick_rhyme_for_word(word,
                                             forbidden_pronunciations + [chosen_pronunciation],
                                             line=line)
         rhyme_word = random.choice(rhyme_words)
         if line:
             # return a whole line
             return random.choice(various_rhymes[rhyme_word])
         # just return a word
         return rhyme_word
     # if we don't have any pronunciations... just return the word
     return word
Example #25
0
def phones_for_closest_match(word):
    """Brute force. Look for lowest distance between all words that are in
    the CMU dictionary.

    """
    by_distance = []
    for possibility in pronouncing.pronunciations:

        # levenstein
        distance = editdistance.eval(possibility, word)

        # give a bonus for same first letter / last letter
        if possibility.startswith(word[0]):
            distance -= 1
        if possibility.endswith(word[-1]):
            distance -= 1 

        # break ties with difference in length
        character_difference = abs(len(possibility) - len(word))
        by_distance.append((distance, character_difference, possibility))

    # find the lowest (final tie breaker is alphabetical, oh well)
    d_edit, d_length, suggestion = min(by_distance)

    # return the suggestion and the phones for the suggestion
    return suggestion, pronouncing.phones_for_word(suggestion)
def knownWord(word):
    word = word.lower()
    word_l = pronouncing.phones_for_word(word)
    if len(word_l) == 0 or word not in words_set:
        #print(Sinit)
        return False
    return True
Example #27
0
def rhyme(word, phones=None):
    """ Returns a list of rhymes for a word.

    The conditions for this 'normal' rhyme between words are:
    (1) last stressed vowel and subsequent phonemes match
    If phones argument not given, phones/pronunciation used will default to the
    first in the list of phones returned for word. If no rhyme is found, an
    empty list is returned.

    This is the 'default' rhyme, same definition used by the pronoucning
    module for its 'rhymes' function. This is also like the shared set of
    perfect and identical rhymes, except the identical word will be removed
    from the returned rhymes list.


    :param word: a word
    :param phones: specific CMUdict phonemes string for word (default None) 
    :return: a rhyme for word
    """

    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError(phones + " not phones for " + word)
    if not phones:
        raise ValueError("phonemes string is empty")
    return [
        w for w in pronouncing.rhyme_lookup.get(
            pronouncing.rhyming_part(phones), []) if (w != word)
    ]
Example #28
0
def phones_for_word(word):
    """Look up a word in the CMU dictionary for it's phones. If it's not
    in there, first deal with hyphens and then use an approximate
    match as a fallback.

    """
    # return a blank phone string for a blank word
    if not word:
        return word, ['']

    # try to look up in dictionary
    phones = pronouncing.phones_for_word(word)
    if phones:
        return word, phones

    # for hyphenated words, look up each word independently and then
    # join back up
    if "-" in word:
        phone_list = []
        for word in word.split('-'):
            suggested, phones = phones_for_word(word)
            phone_list.append((suggested, phones[0]))
        phones = [' '.join(p for (w, p) in phone_list if w)]
        word = '-'.join(w for (w, p) in phone_list)
        return word, phones

    else:
        return phones_for_closest_match(word)
Example #29
0
def perfect_rhyme(word, phones=None):
    """ Returns a list of perfect rhymes for a word.

    The conditions for a perfect rhyme between words are:
    (1) last stressed vowel and subsequent phonemes match
    (2) onset of last stressed syllable is different
    If phones argument not given, phones/pronunciation used will default to the 
    first in the list of phones returned for word. If no rhyme is found, an
    empty list is returned.


    :param word: a word
    :param phones: specific CMUdict phonemes string for word (default None)
    :return: a list of perfect rhymes for word
    """
    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError(phones + " not phones for +" + word)
    if not phones:
        raise ValueError("phonemes string is empty")
    perf_and_iden_rhymes = rhyme(word, phones)
    identical_rhymes = identical_rhyme(word, phones)
    perfect_rhymes = list(np.setdiff1d(perf_and_iden_rhymes, identical_rhymes))
    if word in perfect_rhymes:
        perfect_rhymes.remove(word)
    return perfect_rhymes
    def _new_sentence(self, syls):
        """Create sentence with Markovify, check that it has correct number of syllables,
        return type None if this fails."""

        syls = int(syls)
        sent = self.text_model.make_short_sentence(
            syls * self.config.poem_avg_char_per_syl,
            tries=100,
            max_overlap_ratio=self.config.markovify_max_overlap_ratio,
            max_overlap_total=self.config.markovify_max_overlap_total)

        if sent == None:
            return None

        # Might be double work checking for punctuation
        sentNoPunctuation = sent[0:-1]
        try:
            phones = [
                pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split()
            ]
        except IndexError:
            # Word not found in dictionary
            phones = []

        if sum([pnc.syllable_count(p) for p in phones]) != syls or not sent:
            return None
        else:
            return ''.join(c for c in sent if c not in string.punctuation)
Example #31
0
    def get_phonetic_similarity_rep(self, word):
        """
		convert a given word into a unique phonetic transcription,
		which allows for measuring phonetic similarity with other words
		output: a phonetic string for the input word. each letter in the string
		corresponds uniquely to a phone
		"""

        phonemes_raw = pronouncing.phones_for_word(word.lower())[0].split(' ')
        phonemes = [
            ''.join(filter(lambda c: not c.isdigit(), pho))
            for pho in phonemes_raw
        ]
        #print(phonemes_raw)
        output = ''
        for phoneme in phonemes:
            if phoneme in self.arpabet_map:
                output += self.arpabet_map[phoneme]
            else:
                print("phone ( " + phoneme +
                      " ) does not exist in arpabet map")
                break

        #print(word + " become " + output)
        return output
Example #32
0
def random_match_phones(word, phones=None):
    """Returns words that match a random combination of phonemes

    This is like a random general rhyme, however instead of just the
    last syllable portion, it's the entire word.

    :param word: word that should be in the CMU Pronouncing Dictionary
    :param phones: specific phonemes to rhyme with (default None)
    :return: a word that shares a random combinations of phonemes
    """
    if phones is None:
        phones = first_phones_for_word(word)
        if phones == "":
            return []
    else:
        if phones not in pronouncing.phones_for_word(word):
            raise ValueError("phonemes and word don't match")
    if not phones:
        raise ValueError("phonemes string is empty")
    search_list = wildcard_mix_phones_regex_searches(phones)
    while search_list:
        search = random.choice(search_list)
        rhymes = pronouncing.search(search)
        if rhymes:
            rhymes = unique(rhymes)
            if word in rhymes:
                rhymes.remove(word)
            return rhymes
        else:
            search_list.remove(search)
    print(
        "random general match phones: tried all combos, didn't find anything!")
    return []
Example #33
0
def last_phone(word):
    phones = pr.phones_for_word(word)
    if phones:
        last = phones[0].split(' ')[-1]
        return np.array([(last.find(x) != -1) for x in PHONES]).astype(int)
    else:
        return np.zeros(len(PHONES))
def _update_dword_prons(tuples):
    from pymongo import MongoClient
    import pronouncing
    client = MongoClient()
    db = client['lil-neuron-db']
    for sym, word, in tuples:
        prons = pronouncing.phones_for_word(word.lower())
        db.dword_to_int.update_one({'int': sym}, {'$set': {'prons': prons}})
Example #35
0
def syllable_counts(sentence):
    count = 0
    for word in sentence.words:
        word = word.lower()
        p = pr.phones_for_word(word)
        if len(p) > 0:
            sc = pr.syllable_count(p[0])
            count += sc
    return count
Example #36
0
def a(word):
    """ Return the word with the correct article prepended """
    pronunciation_list = pronouncing.phones_for_word(word)
    try:
        if pronunciation_list[0][0] in "AEIOU":
            return "an " + word
        else:
            return "a " + word
    except IndexError:
        if word[0].lower() in "aeiou":
            return "an " + word
        else:
            return "a " + word
Example #37
0
    def _rhyming_parts(words):
        """Yield the rhyming parts of each pronunciation for each given word.

        `words` is an iterable of strings.

        This static method is an iterator generator that yields pairs
        comprising a word from `words` and a set of lists of
        strings. Each list of strings represents the rhyming part of one
        of the pronunciations of the corresponding word from `words`.

        """
        for word in words:
            phones = pronouncing.phones_for_word(word)
            yield word, set(map(pronouncing.rhyming_part, phones))
Example #38
0
def get_random_word(part_of_speech, syllables=None, skip_plurals=False):
    """
    Get this kind of word with this many syllables.
    If none found with this many syllables, any'll do.
    """
    words = get_random_words_from_wordnik(part_of_speech)
    for word in words:
        pronunciation_list = pronouncing.phones_for_word(word)
        for pronunciation in pronunciation_list:
            count = pronouncing.syllable_count(pronunciation)

            print(count, word)
            if skip_plurals and word[-1].lower() == "s" and word[-2] != "s":
                # Might be plural, just skip it
                print("Plural? Skip!")
                continue

            if count == syllables:
                # Bingo!
                return word

    # Any'll do
    return random.choice(words)
Example #39
0
wjdata = json.loads(wjson)
wjdata_list = wjdata['results'][0]['captions']

# create empty storage for selected captiosn with fitting syllables (with either 5 or 7 syllables)
syllables5 = []
syllables7 = []
syllables23 = []

# check all captions for fitting syllables (using pronouncingpy + CMU pronouncing dictionary)
# add them to the empty storage
for i in range (1, 83):

	try:
		text = wjdata['results'][0]['captions'][i - 1]

		phones = [pronouncing.phones_for_word(p)[0] for p in text.split()]
		count = sum([pronouncing.syllable_count(p) for p in phones])
		for y in range (1, 2):
			if int(count) == 5:
				syllables5.append(wjdata['results'][0]['captions'][i - 1])
		for x in range (0, 1):
			if int(count) == 7:
				syllables7.append(wjdata['results'][0]['captions'][i - 1])
		for z in range (0, 1):
			if int(count) == 3 or int(count) == 2:
				syllables23.append(wjdata['results'][0]['captions'][i - 1])

# skip over errors caused by non-indexed word <UNK> in captions
	except IndexError:
    		pass
	continue
Example #40
0
 def test_phones_for_word(self):
     phones = pronouncing.phones_for_word("conflicts")
     self.assertEqual(len(phones), 4)
     self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
Example #41
0
import enchant

import pronouncing

dictionary = enchant.request_dict("en_US")
print dictionary.suggest("untrimm'd")
print dictionary.suggest("don't")

phones = pronouncing.phones_for_word("dont")
print phones
if phones:
    first_phone = phones[0]
    stresses = pronouncing.stresses(first_phone)
    print stresses
Example #42
0
def translate(text):
    ph = phones_for_word(text)
    if not ph:
        raise ValueError('phone not found')
    return split_phone(ph)
Example #43
0
    encoded = word.encode('utf8')
    for dm in double_metaphone.dm(encoded):
        if dm:
            metaphone_to_word[dm].add(word)
            
by_distance = []
for word in pronouncing.pronunciations:
    distance = editdistance.eval(word, WORD)
    if word.startswith(WORD[0]):
        distance -= 1
    if word.endswith(WORD[-1]):
        distance -= 1 
    character_difference = abs(len(word) - len(WORD))
    by_distance.append((distance, character_difference, word))

by_distance.sort()
print by_distance[:100]
print min(by_distance)
            
print pronouncing.phones_for_word('luteous')
print pronouncing.phones_for_word('gluteus')

possible = set()
for dm in double_metaphone.dm('luteous'):
    if dm:
        possible.update(metaphone_to_word['LTS'])

print possible


 def test_a(self):
     words = pronouncing.phones_for_word('a')
     self.assertEqual(words, ['AH0', 'EY1'])
 def test_phones_for_word_uppercase(self):
     phones = pronouncing.phones_for_word("CONFLICTS")
     self.assertEqual(len(phones), 4)
     self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
Example #46
0
def my(word):
    phones = pronouncing.phones_for_word(word)
    if phones:
        return pronouncing.syllable_count(phones[0])
    else:
        return syllables_en.count(word)
 def test_a42128(self):
     # This is not in the newer cmudict set
     words = pronouncing.phones_for_word('a42128')
     self.assertEqual(words, [])