def mutate_limerick(limerick): line_to_mutate = randint(1, 5) if line_to_mutate == 1: line = limerick.line_1 elif line_to_mutate == 2: line = limerick.line_2 elif line_to_mutate == 3: line = limerick.line_3 elif line_to_mutate == 4: line = limerick.line_4 else: line = limerick.line_5 word_to_mutate = line[randint(1, len(line)) - 1] new_word = sample(words.words(), 1)[0] phones = pronouncing.phones_for_word(new_word) # makes sure that the word has a definition while (len(phones) < 1): new_word = sample(words.words(), 1)[0] phones = pronouncing.phones_for_word(new_word) new_line = line.replace(word_to_mutate, new_word) if line_to_mutate == 1: limerick.set_line_1(new_line) elif line_to_mutate == 2: limerick.set_line_2(new_line) elif line_to_mutate == 3: limerick.set_line_3(new_line) elif line_to_mutate == 4: limerick.set_line_4(new_line) else: limerick.set_line_5(new_line)
def generate_line(syllable_number, rhyme_word, word_list): phones = pronouncing.phones_for_word(rhyme_word) phone = random.choice(phones) rhyme_syllabi_count = pronouncing.syllable_count(phone) line_syllabi_count = rhyme_syllabi_count line = rhyme_word timeout_counter = 0 # print("in line syllabi count loop") while line_syllabi_count != syllable_number: over_limit = (line_syllabi_count > syllable_number) timeout = (timeout_counter > 100000) # print(line_syllabi_count) # print(syllable_number) # print(timeout_counter) # print(timeout_counter > 100000) # print(over_limit) # print(over_limit and timeout) if over_limit is False: word = random.choice(word_list).rstrip() phones = pronouncing.phones_for_word(word) if len(phones) != 0: word_syllabi_count = pronouncing.syllable_count(phone) line_syllabi_count += word_syllabi_count line = word + " " + line elif over_limit: if timeout: return line line_syllabi_count = rhyme_syllabi_count line = rhyme_word timeout_counter += 1 return line
def test_phones_for_word(self): phones = pronouncing.phones_for_word("conflicts") self.assertEqual(len(phones), 4) self.assertEqual(phones[0], "K AH0 N F L IH1 K T S") # not in the dictionary (presumably) phones = pronouncing.phones_for_word("asdfasdfasdf") self.assertEqual(phones, [])
def findLineStress(tokenized_line): ''' find accentual stress of a given tokenized line, based on CMU dict. Uses relative stress per word, so somewhat limited. Parameters ---------- tokenized_line : list list of tokens from line, usually preprocessed to remove non-words Returns ------- parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle) ''' parses = [''] for word in tokenized_line: pros = pronouncing.phones_for_word(word) if pros: for phonelist in [pronouncing.phones_for_word(word)]: stressOptions = deepcopy(parses) currLen = len(parses) newparse = [] # I don't really need to loop through pronunciations # just distinct stress patterns, so a little inefficient here for pronunciation in phonelist: wordStress = pronouncing.stresses(pronunciation) for option in range(currLen): newparse.append('' + str(stressOptions[option]) + str(wordStress)) parses = newparse return list(set(parses))
def split_spellings(sentence, full_pronounciation_output=False): word_array = tokenize.WhitespaceTokenizer().tokenize(sentence) print(word_array) for word in word_array: word = string_cleaner(word) if word == "": continue if word.isdigit(): numword = inflect_engine.number_to_words(word) numword = string_cleaner(numword) print(numword) if " " in numword: numword = numword.split(" ") for element in numword: output = pronouncing.phones_for_word(element) if not full_pronounciation_output: yield output[0] else: yield output else: output = pronouncing.phones_for_word(word) if not full_pronounciation_output: yield output[0] else: yield output
def fallback_get_phonemes(name, lang="en"): if not lang.startswith("en"): return ValueError("Unsupported language") name = name.lower() phonemes = None if " " in name: total_phonemes = [] names = name.split(" ") for name in names: phon = fallback_get_phonemes(name) if phon is None: return None total_phonemes.extend(phon) total_phonemes.append(" . ") if total_phonemes[-1] == " . ": total_phonemes = total_phonemes[:-1] phonemes = "".join(total_phonemes) elif len(pronouncing.phones_for_word(name)): phonemes = "".join(pronouncing.phones_for_word(name)[0]) else: guess = fallback_guess_phonemes(name) if guess is not None: phonemes = " ".join(guess) return phonemes
def findLineStress(line): '''find accentual stress of a given line, based on CMU dict. Still a bit unclever. _parameters_ line: line of text _returns_ parselist: list of potential stresses after parsing. 0 is unstressed, 1 is primary stress, 2 is secondary stress (middle) syllableLengths: list of syllable lengths corresponding to the parses in parselist wordCount: count of words in the line ''' line = prepString(removeMarkupWords(line)) words = line.split() wordCount = len(words) parses = [''] for word in words: pros = pronouncing.phones_for_word(word) if pros: for phonelist in [pronouncing.phones_for_word(word)]: stressOptions = copy.deepcopy(parses) currLen = len(parses) newparse = [] # I don't really need to loop through pronunciations, just distinct stress patterns, so a little inefficient here for pronunciation in phonelist: wordStress = pronouncing.stresses(pronunciation) for option in range(currLen): newparse.append('' + str(stressOptions[option]) + str(wordStress)) parses = newparse return list(set(parses)), [len(parse) for parse in list(set(parses))], wordCount
def syllable_count(sentence): phones = [] for p in sentence.split(): if not pronouncing.phones_for_word(p.strip()) == []: phones.append(pronouncing.phones_for_word(p.strip())[0]) else: phones.append("") return sum([pronouncing.syllable_count(p) for p in phones])
def leven_distance(old_word, new_word): #1st Technique w_len_ch_old = len(old_word) w_len_ch_new = len(new_word) w_len_ch = min(w_len_ch_old, w_len_ch_old) #distance dis_ch = Levenshtein.distance(old_word, new_word) ratio_ch = (w_len_ch - dis_ch) / w_len_ch #2nd and 3rd Technique #Checking if phonetic representation exisist or not if (len(pp.phones_for_word(old_word))) > 0 and len( pp.phones_for_word(new_word)) > 0: #2nd Technique #Getting phonetic representation old_word_phs = pp.phones_for_word(old_word)[0] new_word_phs = pp.phones_for_word(new_word)[0] w_len_phs_old = len(old_word_phs) w_len_phs_new = len(new_word_phs) w_len_phs = min(w_len_phs_old, w_len_phs_new) #distance dis_phs = Levenshtein.distance(old_word_phs, new_word_phs) ratio_phs = (w_len_phs - dis_phs) / w_len_phs #3rd Technique #Getting phonetic representation without spaces old_word_ph = old_word_phs.replace(" ", "") new_word_ph = new_word_phs.replace(" ", "") w_len_ph_old = len(old_word_ph) w_len_ph_new = len(new_word_ph) w_len_ph = min(w_len_ph_old, w_len_ph_new) #distance dis_ph = Levenshtein.distance(old_word_ph, new_word_ph) ratio_ph = (w_len_ph - dis_ph) / w_len_ph #Assigning a large value to get only true cases else: ratio_ph = -1000 ratio_phs = -1000 #Returning max ratio from all three Technique ratio = max(ratio_ch, ratio_ph, ratio_phs) #Assigning smallest value to get only true cases if (ratio > 0.3): return ratio else: return 0.0001
def syllable_counter(lines): ''' Function to count all syllables in a list of strings. NOTE: This does not factor in multi-syllabic digits, times (i.e. 1:03), and most likely other non-"word" words. Input ----- lines : list (str) List of strings to count. Output ------ sum(total) : int Total number of syllables in the input list. [Modified from Allison Parrish's example in the documention for her library, pronouncing]: https://pronouncing.readthedocs.io/en/latest/tutorial.html ''' # create empty list total = [] # loop over list for line in lines: # turn each word into a string of its phonemes # if else statement ensures that each word is counted with # at least one syllable, even if that word is not in the # pronouncing library's dictionary (using phoneme for 'I' # as a placeholder for single syllable) phonemes = [ pronouncing.phones_for_word(word)[0] if pronouncing.phones_for_word(word) else 'AY1' for word in line.split() ] # count the syllables in each string and add the total # syllables per line to the total list total.append( sum([pronouncing.syllable_count(phoneme) for phoneme in phonemes])) # return the total number of syllables return sum(total)
def pronunciationSimilarity(str1, str2): ''' Finds the relative edit distance of the phones of two strings. ''' try: pronunciation1 = pronouncing.phones_for_word(str1.lower()) pronunciation2 = pronouncing.phones_for_word(str2.lower()) phones1 = pronunciation1[0].split() phones2 = pronunciation2[0].split() return editSimilarity(phones1, phones2) except: return 1
def rhymes_all_words(word, mc): """ Look for all words that rhyme with word in our markov chain :param word: string :param mc: dict :return: rhyme_list: list Examples -------- >>>rhymes_all_words('people', markov_chain) ['adorable', 'example', 'little', 'professional', 'people', 'sentimental', 'mutual'... ] >>>rhymes_all_words('5-foot', markov_chain) ['put', 'underfoot', 'foot'] """ if "-" in word: word = word.split("-")[-1] # Finding all words that rhyme with word, disregarding the word's mc words = mc.keys() regex = re.compile("^([A-Z])\w+([a-zA-Z]+[-'][a-zA-Z]+)|([a-zA-Z]+\.)|([a-zA-Z])+$") words = [w for w in words if regex.match(w)] try: word_pron = pronouncing.phones_for_word(word)[0].split() except: print('no pron ' + word) return 'a' index = -1 for pron in reversed(word_pron): if not pron.isalpha(): index = word_pron.index(pron) break word_prons = [] # get array of the parts of the word pronunciation that must be compared for rhyming for wp in pronouncing.phones_for_word(word): wp = wp.split()[index - len(word_pron):] word_prons.append(wp) rhyme_list = [] for w in words: # look at all words in word mc if pronouncing.phones_for_word(w): # if we can get the words pron for w_pron in pronouncing.phones_for_word(w): w_pron = w_pron.split() if len(w_pron) > (len(word_pron) - index) and w_pron[index - len(word_pron):] in word_prons: rhyme_list.append(w) break return rhyme_list
def countable_corpus(corpus): ''' Function to convert a corpus to a Markov dictionary where all of the words are "countable", i.e. they appear in the CMU dictionary associated with the `pronouncing` package and thus their syllables and phonemes can be counted and utilized. This is necessary for generators that have syllabic constraints. Input ----- corpus : str Corpus as one long string. Output ------ text_dictionary : dict [str, list (str)] Markov dictionary with each (countable) word that appears in the corpus as keys, with each value being a list of (countable) words that follow that key. ''' # instantiate a dictionary text_dictionary = defaultdict(list) # create Markov dictionary # iterate over list of each word and its subsequent word for current_word, next_word in zip(corpus, corpus[1:]): # append word to list as long as its phonemes can be counted if phones_for_word(next_word): text_dictionary[current_word].append(next_word) # otherwise choose a random word from the corpus whose phonemes can # NOTE: introduces some randomness! preventing assured reproducibility else: word = '' while not word: word = random.choice(corpus) if phones_for_word(word): text_dictionary[current_word].append(word) else: word = '' return text_dictionary
def get_rhyming_groups(group_size, number_groups, pool): """Returns a list of rhyming groups of the given size from the given candidate pool. Args: group_size (int): number of lines in the rhyming group. number_groups (int): number of rhyming groups. pool (list) : candidate pool from which to draw lines. Raises: InsufficientSentencesError: if the candidate pool is not rich enough. """ clusters = defaultdict(list) while len(list(filter(lambda c: len(c) >= group_size, clusters.values()))) < number_groups: try: sentence = pool.pop() except KeyError: raise InsufficientSentencesError( 'Candidate pool is not rich enough!') last_word = sentence.split(" ")[-1] last_word_phones = pronouncing.phones_for_word(last_word)[0] rhyming_part = pronouncing.rhyming_part(last_word_phones) if last_word not in [s.split(" ")[-1] for s in clusters[rhyming_part]]: clusters[rhyming_part].append(sentence) groups = list(filter(lambda c: len(c) >= group_size, clusters.values())) random.shuffle(groups) return [random.sample(group, group_size) for group in groups]
def last_vowel_phones(line, num_phones): last_word = ending_word(line) vowels = { 'AA1', 'AE1', 'AH1', 'AO1', 'AW1', 'AY1', 'EH1', 'ER1', 'EY1', 'IH1', 'IY1', 'OW1', 'OY1', 'UH1', 'UW1', 'AA0', 'AE0', 'AH0', 'AO0', 'AW0', 'AY0', 'EH0', 'ER0', 'EY0', 'IH0', 'IY0', 'OW0', 'OY0', 'UH0', 'UW0' } # get the ending phoneme of the word phones = pronouncing.phones_for_word(last_word) if phones: phones = phones[0] phones_split = phones.split() result_phones = [] vowel_phones = [] for phone in phones_split: # print (phone) if phone in vowels: vowel_phones.append(phone) if len(vowel_phones) >= num_phones: for i in range(1, num_phones + 1): result_phones.append(vowel_phones[-i]) else: result_phones.append(vowel_phones[-1]) return 0 return result_phones else: return 0
def line_rhymescheme(self, line): """ Creates rhyme scheme for a given line. :param line: line of lyrics, str :return: last two morphemes of the last word in line, str """ end_word = re.sub(r"\W+", '', get_last_word(line)).lower() pronunciation_list = pronouncing.phones_for_word(end_word) if pronunciation_list: potential_rhymes = {} sound_pairs = [] for item in pronunciation_list: sound_pair = item.split(' ')[-2:] if len(sound_pair) < 2: for sound1 in self.sound_dict[sound_pair[0]]: sound_pairs.append((sound1, '')) else: for sound1 in self.sound_dict[sound_pair[0]]: for sound2 in self.sound_dict[sound_pair[1]]: sound_pairs.append((sound1, sound2)) for sound_pair in sound_pairs: if sound_pair not in potential_rhymes.keys(): potential_rhymes[sound_pair] = 0 if sound_pair[1] == '': potential_rhymes[sound_pair] += len(pronouncing.search(sound_pair[0] + "$")) else: potential_rhymes[sound_pair] += len(pronouncing.search(sound_pair[0] + " " + sound_pair[1] + "$")) most_freq_pair = max(potential_rhymes.items(), key=operator.itemgetter(1))[0] rhymescheme = most_freq_pair[0] + ' ' + most_freq_pair[1] else: rhymescheme = end_word[-2:] return rhymescheme
def test(): keep_going = True while keep_going: word = input("Please enter a word (Enter '0' to quit): ") if word == '0': keep_going = False elif word == "": pass else: print(cfd[word].keys(), cfd[word].values()) print() print("Random 5 words following", word) print(random_word_generator(word, 5)) print() print("Pronunciations of", word) print(pronouncing.phones_for_word(word)) print() print("Syllables in", word) print(count_syllables(word)) print() print("Rhymes for", word) print(get_rhymes(word)) print() print("Stresses for", word) print(get_stresses(word)) print()
def is_iambic(phrase): """ check that we satisfy iambic meter. return 1 if so, otherwise 0. definitely an imperfect check... if we end up needing to check a word that's not in the CMU dictionary, just return 0. """ meter = '' for word in phrase.split(): word = word.strip().strip(string.punctuation).lower() try: phones_list = pronouncing.phones_for_word(word) stresses = pronouncing.stresses(phones_list[0]) if len(stresses) == 1: if stresses == '1': stresses = '2' # allow ambiguity for 1-syllable words with stress 1 meter += stresses # just default to the first pronunciation if > 1 given except: return 0 # word not found meter = [int(x) for x in meter] even_stresses_full = [meter[i] for i in range(0, len(meter), 2)] odd_stresses_full = [meter[i] for i in range(1, len(meter), 2)] even_stresses = set(even_stresses_full) odd_stresses = set(odd_stresses_full) if 0 in odd_stresses: return 0 if 1 in even_stresses: return 0 return 1
def _new_sentence2(self, syls): syls = int(syls) sent = None phones = [] while sent == None or sum([pnc.syllable_count(p) for p in phones]) != syls: print(sent) print(sum([pnc.syllable_count(p) for p in phones]) - syls) sent = self.text_model.make_short_sentence( syls * self.config.poem_avg_char_per_syl, tries=100, max_overlap_ratio=self.config.markovify_max_overlap_ratio, max_overlap_total=self.config.markovify_max_overlap_total) if sent == None: continue sentNoPunctuation = sent[0:-1] try: phones = [ pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split() ] except IndexError: # Word not found in dictionary phones = [] return ''.join(c for c in sent if c not in string.punctuation)
def phones_for_closest_match(word): """Brute force. Look for lowest distance between all words that are in the CMU dictionary. """ by_distance = [] for possibility in pronouncing.pronunciations: # levenstein distance = editdistance.eval(possibility, word) # give a bonus for same first letter / last letter if possibility.startswith(word[0]): distance -= 1 if possibility.endswith(word[-1]): distance -= 1 # break ties with difference in length character_difference = abs(len(possibility) - len(word)) by_distance.append((distance, character_difference, possibility)) # find the lowest (final tie breaker is alphabetical, oh well) d_edit, d_length, suggestion = min(by_distance) # return the suggestion and the phones for the suggestion return suggestion, pronouncing.phones_for_word(suggestion)
def phones_for_word(word): """Look up a word in the CMU dictionary for it's phones. If it's not in there, first deal with hyphens and then use an approximate match as a fallback. """ # return a blank phone string for a blank word if not word: return word, [''] # try to look up in dictionary phones = pronouncing.phones_for_word(word) if phones: return word, phones # for hyphenated words, look up each word independently and then # join back up if "-" in word: phone_list = [] for word in word.split('-'): suggested, phones = phones_for_word(word) phone_list.append((suggested, phones[0])) phones = [' '.join(p for (w, p) in phone_list if w)] word = '-'.join(w for (w, p) in phone_list) return word, phones else: return phones_for_closest_match(word)
def search_match_by_regex(): phones_sigh = pronouncing.phones_for_word("jest")[0] print phones_sigh print pronouncing.search(phones_sigh)[:5] #finds all of the words that end in -iddle phone_iddle = pronouncing.search("IH1 D AH0 L$") print phone_iddle
def pick_rhyme_for_word(self, word, forbidden_pronunciations=[], line=True): pronunciations = pronouncing.phones_for_word(word) for forbidden in forbidden_pronunciations: if forbidden in pronunciations: # what the f**k... how can this happen? pronunciations.remove(forbidden) else: print("WHAAAT?? word: {} pronunciations: {} forbidden: {}".format(word, pronunciations, forbidden)) if len(pronunciations) > 0: # choose a pronunciation of the word at random and extract the rhyme phonemes chosen_pronunciation = random.choice(pronunciations) rhyming_part = pronouncing.rhyming_part(chosen_pronunciation) # consider the other words which rhyme with these phonemes various_rhymes = self.by_rhyming_part[rhyming_part] rhyme_words = list(various_rhymes.keys()) if len(rhyme_words) > 1 and word in rhyme_words: # sometimes it doesn't show up, if it's the only line with that ending. rhyme_words.remove(word) # don't rhyme it with itself if len(rhyme_words) == 0: # sadness. try another pronunciation return self.pick_rhyme_for_word(word, forbidden_pronunciations + [chosen_pronunciation], line=line) rhyme_word = random.choice(rhyme_words) if line: # return a whole line return random.choice(various_rhymes[rhyme_word]) # just return a word return rhyme_word # if we don't have any pronunciations... just return the word return word
def knownWord(word): word = word.lower() word_l = pronouncing.phones_for_word(word) if len(word_l) == 0 or word not in words_set: #print(Sinit) return False return True
def rhyme(word, phones=None): """ Returns a list of rhymes for a word. The conditions for this 'normal' rhyme between words are: (1) last stressed vowel and subsequent phonemes match If phones argument not given, phones/pronunciation used will default to the first in the list of phones returned for word. If no rhyme is found, an empty list is returned. This is the 'default' rhyme, same definition used by the pronoucning module for its 'rhymes' function. This is also like the shared set of perfect and identical rhymes, except the identical word will be removed from the returned rhymes list. :param word: a word :param phones: specific CMUdict phonemes string for word (default None) :return: a rhyme for word """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError(phones + " not phones for " + word) if not phones: raise ValueError("phonemes string is empty") return [ w for w in pronouncing.rhyme_lookup.get( pronouncing.rhyming_part(phones), []) if (w != word) ]
def perfect_rhyme(word, phones=None): """ Returns a list of perfect rhymes for a word. The conditions for a perfect rhyme between words are: (1) last stressed vowel and subsequent phonemes match (2) onset of last stressed syllable is different If phones argument not given, phones/pronunciation used will default to the first in the list of phones returned for word. If no rhyme is found, an empty list is returned. :param word: a word :param phones: specific CMUdict phonemes string for word (default None) :return: a list of perfect rhymes for word """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError(phones + " not phones for +" + word) if not phones: raise ValueError("phonemes string is empty") perf_and_iden_rhymes = rhyme(word, phones) identical_rhymes = identical_rhyme(word, phones) perfect_rhymes = list(np.setdiff1d(perf_and_iden_rhymes, identical_rhymes)) if word in perfect_rhymes: perfect_rhymes.remove(word) return perfect_rhymes
def _new_sentence(self, syls): """Create sentence with Markovify, check that it has correct number of syllables, return type None if this fails.""" syls = int(syls) sent = self.text_model.make_short_sentence( syls * self.config.poem_avg_char_per_syl, tries=100, max_overlap_ratio=self.config.markovify_max_overlap_ratio, max_overlap_total=self.config.markovify_max_overlap_total) if sent == None: return None # Might be double work checking for punctuation sentNoPunctuation = sent[0:-1] try: phones = [ pnc.phones_for_word(p)[0] for p in sentNoPunctuation.split() ] except IndexError: # Word not found in dictionary phones = [] if sum([pnc.syllable_count(p) for p in phones]) != syls or not sent: return None else: return ''.join(c for c in sent if c not in string.punctuation)
def get_phonetic_similarity_rep(self, word): """ convert a given word into a unique phonetic transcription, which allows for measuring phonetic similarity with other words output: a phonetic string for the input word. each letter in the string corresponds uniquely to a phone """ phonemes_raw = pronouncing.phones_for_word(word.lower())[0].split(' ') phonemes = [ ''.join(filter(lambda c: not c.isdigit(), pho)) for pho in phonemes_raw ] #print(phonemes_raw) output = '' for phoneme in phonemes: if phoneme in self.arpabet_map: output += self.arpabet_map[phoneme] else: print("phone ( " + phoneme + " ) does not exist in arpabet map") break #print(word + " become " + output) return output
def random_match_phones(word, phones=None): """Returns words that match a random combination of phonemes This is like a random general rhyme, however instead of just the last syllable portion, it's the entire word. :param word: word that should be in the CMU Pronouncing Dictionary :param phones: specific phonemes to rhyme with (default None) :return: a word that shares a random combinations of phonemes """ if phones is None: phones = first_phones_for_word(word) if phones == "": return [] else: if phones not in pronouncing.phones_for_word(word): raise ValueError("phonemes and word don't match") if not phones: raise ValueError("phonemes string is empty") search_list = wildcard_mix_phones_regex_searches(phones) while search_list: search = random.choice(search_list) rhymes = pronouncing.search(search) if rhymes: rhymes = unique(rhymes) if word in rhymes: rhymes.remove(word) return rhymes else: search_list.remove(search) print( "random general match phones: tried all combos, didn't find anything!") return []
def last_phone(word): phones = pr.phones_for_word(word) if phones: last = phones[0].split(' ')[-1] return np.array([(last.find(x) != -1) for x in PHONES]).astype(int) else: return np.zeros(len(PHONES))
def _update_dword_prons(tuples): from pymongo import MongoClient import pronouncing client = MongoClient() db = client['lil-neuron-db'] for sym, word, in tuples: prons = pronouncing.phones_for_word(word.lower()) db.dword_to_int.update_one({'int': sym}, {'$set': {'prons': prons}})
def syllable_counts(sentence): count = 0 for word in sentence.words: word = word.lower() p = pr.phones_for_word(word) if len(p) > 0: sc = pr.syllable_count(p[0]) count += sc return count
def a(word): """ Return the word with the correct article prepended """ pronunciation_list = pronouncing.phones_for_word(word) try: if pronunciation_list[0][0] in "AEIOU": return "an " + word else: return "a " + word except IndexError: if word[0].lower() in "aeiou": return "an " + word else: return "a " + word
def _rhyming_parts(words): """Yield the rhyming parts of each pronunciation for each given word. `words` is an iterable of strings. This static method is an iterator generator that yields pairs comprising a word from `words` and a set of lists of strings. Each list of strings represents the rhyming part of one of the pronunciations of the corresponding word from `words`. """ for word in words: phones = pronouncing.phones_for_word(word) yield word, set(map(pronouncing.rhyming_part, phones))
def get_random_word(part_of_speech, syllables=None, skip_plurals=False): """ Get this kind of word with this many syllables. If none found with this many syllables, any'll do. """ words = get_random_words_from_wordnik(part_of_speech) for word in words: pronunciation_list = pronouncing.phones_for_word(word) for pronunciation in pronunciation_list: count = pronouncing.syllable_count(pronunciation) print(count, word) if skip_plurals and word[-1].lower() == "s" and word[-2] != "s": # Might be plural, just skip it print("Plural? Skip!") continue if count == syllables: # Bingo! return word # Any'll do return random.choice(words)
wjdata = json.loads(wjson) wjdata_list = wjdata['results'][0]['captions'] # create empty storage for selected captiosn with fitting syllables (with either 5 or 7 syllables) syllables5 = [] syllables7 = [] syllables23 = [] # check all captions for fitting syllables (using pronouncingpy + CMU pronouncing dictionary) # add them to the empty storage for i in range (1, 83): try: text = wjdata['results'][0]['captions'][i - 1] phones = [pronouncing.phones_for_word(p)[0] for p in text.split()] count = sum([pronouncing.syllable_count(p) for p in phones]) for y in range (1, 2): if int(count) == 5: syllables5.append(wjdata['results'][0]['captions'][i - 1]) for x in range (0, 1): if int(count) == 7: syllables7.append(wjdata['results'][0]['captions'][i - 1]) for z in range (0, 1): if int(count) == 3 or int(count) == 2: syllables23.append(wjdata['results'][0]['captions'][i - 1]) # skip over errors caused by non-indexed word <UNK> in captions except IndexError: pass continue
def test_phones_for_word(self): phones = pronouncing.phones_for_word("conflicts") self.assertEqual(len(phones), 4) self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
import enchant import pronouncing dictionary = enchant.request_dict("en_US") print dictionary.suggest("untrimm'd") print dictionary.suggest("don't") phones = pronouncing.phones_for_word("dont") print phones if phones: first_phone = phones[0] stresses = pronouncing.stresses(first_phone) print stresses
def translate(text): ph = phones_for_word(text) if not ph: raise ValueError('phone not found') return split_phone(ph)
encoded = word.encode('utf8') for dm in double_metaphone.dm(encoded): if dm: metaphone_to_word[dm].add(word) by_distance = [] for word in pronouncing.pronunciations: distance = editdistance.eval(word, WORD) if word.startswith(WORD[0]): distance -= 1 if word.endswith(WORD[-1]): distance -= 1 character_difference = abs(len(word) - len(WORD)) by_distance.append((distance, character_difference, word)) by_distance.sort() print by_distance[:100] print min(by_distance) print pronouncing.phones_for_word('luteous') print pronouncing.phones_for_word('gluteus') possible = set() for dm in double_metaphone.dm('luteous'): if dm: possible.update(metaphone_to_word['LTS']) print possible
def test_a(self): words = pronouncing.phones_for_word('a') self.assertEqual(words, ['AH0', 'EY1'])
def test_phones_for_word_uppercase(self): phones = pronouncing.phones_for_word("CONFLICTS") self.assertEqual(len(phones), 4) self.assertEqual(phones[0], "K AH0 N F L IH1 K T S")
def my(word): phones = pronouncing.phones_for_word(word) if phones: return pronouncing.syllable_count(phones[0]) else: return syllables_en.count(word)
def test_a42128(self): # This is not in the newer cmudict set words = pronouncing.phones_for_word('a42128') self.assertEqual(words, [])