def test_words(): EXPECTED = 135093 w = cmudict.words() COUNT = len(w) if (COUNT != EXPECTED): raise AssertionError( 'cmudict.raw(): Expected {0} bytes, got {1}.'.format( EXPECTED, COUNT))
def getPsuedoKeyword(target_phone, already_present): a = cmudict.dict() b = cmudict.words() found = False for word in b: for lst in a[word]: for phone in lst: if(target_phone == phone and already_present.get(word) == None): already_present[word] = 1 return word if(re.search(target_phone,phone) and len(target_phone) !=1 and already_present.get(word) == None): already_present[word] =1 return word
foo = open( "C:/Users/cdobb/AppData/Local/Programs/Python/Python38-32/Lib/site-packages/enchant/data/mingw32/share/enchant/hunspell/en_US.dic" ) # First we are simply finding words of length 8 with 3 i letters # -- some preprocessing of the dictionary entries is involved -- for bar in foo: bar = bar.split("/")[0] if "\n" in bar: bar = bar[:-2] bar = bar.lower() if len(bar) == 8 and three_i(bar): # now we look at pronunciation with cmudict... temp = 0 try: temp = cmudict.words().index(bar) except: temp = -1 if temp >= 0: temp = cmudict.entries()[temp] temp = temp[1] #now check each syllable... no_i_sound = True for syl in temp: if "IH" in syl or "AY" in syl: no_i_sound = False break if no_i_sound: # finally we want to get rid of names or obscure words by just checking # to see if PyDictionary recognizes it... meaning = pydict.meaning(bar, disable_errors=True)