Ejemplo n.º 1
0
def test_entries():
    EXPECTED = 135093
    e = cmudict.entries()
    COUNT = len(e)
    if (COUNT != EXPECTED):
        raise AssertionError(
            'cmudict.entries(): Expected {0} entries, got {1}.'.format(
                EXPECTED, COUNT))
Ejemplo n.º 2
0
def lookup2_word(word_s):
    entries = [e[1] for e in cmudict.entries() if e[0] == word_s]
    return entries
Ejemplo n.º 3
0
    head = p_word[0]
    value = ''
    for syllable in p_word[1]:
        parts = re.split('(\d+)', syllable)  # split off stress
        for component in parts:
            if component in arpabet.keys():
                value = value + arpabet[component]['ipa']
    return (head, value)


print('loading words...')

extras = [('hypotenuse', 'HH AH0 P AA1 T AH N Y UW2 Z'.split(' ')),
          ('quadratical', 'K W AA2 D R AE1 T IH K AH L'.split(' '))]

for thisWord in list(cmudict.entries()) + extras:
    if thisWord[0] in pronounceDict.keys():
        continue  # keep only the first instance
    thisWord = encodeWord(thisWord)
    pronounceDict[thisWord[0]] = thisWord[1]
print()

#print('finished loading...')

paragraph = 'demand'

print(paragraph)
print()

paragraph = paragraph.lower()
Ejemplo n.º 4
0
# First we are simply finding words of length 8 with 3 i letters
# -- some preprocessing of the dictionary entries is involved --
for bar in foo:
    bar = bar.split("/")[0]
    if "\n" in bar:
        bar = bar[:-2]
    bar = bar.lower()
    if len(bar) == 8 and three_i(bar):
        # now we look at pronunciation with cmudict...
        temp = 0
        try:
            temp = cmudict.words().index(bar)
        except:
            temp = -1
        if temp >= 0:
            temp = cmudict.entries()[temp]
            temp = temp[1]
            #now check each syllable...
            no_i_sound = True
            for syl in temp:
                if "IH" in syl or "AY" in syl:
                    no_i_sound = False
                    break
            if no_i_sound:
                # finally we want to get rid of names or obscure words by just checking
                # to see if PyDictionary recognizes it...
                meaning = pydict.meaning(bar, disable_errors=True)
                if meaning != {} and type(meaning) != type(None):
                    print(bar)

foo.close()
Ejemplo n.º 5
0
#Programmer: Mark Morreale
#Goal: Find probability of whether "ie" or "ei" occurs to make the sound IY (ARPABET) in English words

#Use the CMUDict Pronouncing Dictionary to find when words contain the sound IY
#It has the raw data (and entries data) and the sets will be made from those lines,
#excluding extra info from the set

#TODO: add list of all CMUDict words containing the sound IY @markmorreale
#use more methods to work with all the words in the dictionary

#create initial list of all entries regardless of letters and sounds
import cmudict
init_dict = cmudict.entries()

ie_entries = []
ei_entries = []
IY_entries = []

for entry in init_dict:
    if "ie" in entry[0]:
        ie_entries.append(entry)
    if "ei" in entry[0]:
        ei_entries.append(entry) 
    for sound in entry[1]:
        if "IY" in sound:
            IY_entries.append(entry)

#We now have three sets of words that contain letters or sounds we are investigating
#Can check statistics on those sets