def test_entries(): EXPECTED = 135093 e = cmudict.entries() COUNT = len(e) if (COUNT != EXPECTED): raise AssertionError( 'cmudict.entries(): Expected {0} entries, got {1}.'.format( EXPECTED, COUNT))
def lookup2_word(word_s): entries = [e[1] for e in cmudict.entries() if e[0] == word_s] return entries
head = p_word[0] value = '' for syllable in p_word[1]: parts = re.split('(\d+)', syllable) # split off stress for component in parts: if component in arpabet.keys(): value = value + arpabet[component]['ipa'] return (head, value) print('loading words...') extras = [('hypotenuse', 'HH AH0 P AA1 T AH N Y UW2 Z'.split(' ')), ('quadratical', 'K W AA2 D R AE1 T IH K AH L'.split(' '))] for thisWord in list(cmudict.entries()) + extras: if thisWord[0] in pronounceDict.keys(): continue # keep only the first instance thisWord = encodeWord(thisWord) pronounceDict[thisWord[0]] = thisWord[1] print() #print('finished loading...') paragraph = 'demand' print(paragraph) print() paragraph = paragraph.lower()
# First we are simply finding words of length 8 with 3 i letters # -- some preprocessing of the dictionary entries is involved -- for bar in foo: bar = bar.split("/")[0] if "\n" in bar: bar = bar[:-2] bar = bar.lower() if len(bar) == 8 and three_i(bar): # now we look at pronunciation with cmudict... temp = 0 try: temp = cmudict.words().index(bar) except: temp = -1 if temp >= 0: temp = cmudict.entries()[temp] temp = temp[1] #now check each syllable... no_i_sound = True for syl in temp: if "IH" in syl or "AY" in syl: no_i_sound = False break if no_i_sound: # finally we want to get rid of names or obscure words by just checking # to see if PyDictionary recognizes it... meaning = pydict.meaning(bar, disable_errors=True) if meaning != {} and type(meaning) != type(None): print(bar) foo.close()
#Programmer: Mark Morreale #Goal: Find probability of whether "ie" or "ei" occurs to make the sound IY (ARPABET) in English words #Use the CMUDict Pronouncing Dictionary to find when words contain the sound IY #It has the raw data (and entries data) and the sets will be made from those lines, #excluding extra info from the set #TODO: add list of all CMUDict words containing the sound IY @markmorreale #use more methods to work with all the words in the dictionary #create initial list of all entries regardless of letters and sounds import cmudict init_dict = cmudict.entries() ie_entries = [] ei_entries = [] IY_entries = [] for entry in init_dict: if "ie" in entry[0]: ie_entries.append(entry) if "ei" in entry[0]: ei_entries.append(entry) for sound in entry[1]: if "IY" in sound: IY_entries.append(entry) #We now have three sets of words that contain letters or sounds we are investigating #Can check statistics on those sets