-
Notifications
You must be signed in to change notification settings - Fork 0
/
wordutil.py
64 lines (57 loc) · 1.58 KB
/
wordutil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from curses.ascii import isdigit
import nltk
from nltk.corpus import cmudict
from nltk.tag.simplify import simplify_wsj_tag
from db import init
from db import Word
def __syllables__(word):
print "Doing syllables lookup for", word
d = cmudict.dict()
if word == '':
return 0
return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word]][0]
def __frequency__(word):
print "Doing frequency lookup for", word
minfreq = 10**-6 # Right now, we have no really good method here...
return minfreq
# Expensive contextless word PoS lookup...
def __part_of_speech__( word ):
print "Doing POS lookup for", word
tagged_sent = nltk.pos_tag([word])
return simplify_wsj_tag(tagged_sent[0][1])
no_syllables_data = set()
def syllables( word ):
word = word.lower()
if word in no_syllables_data:
raise Exception("Couldn't find syllables count for "+word)
db = init('word')
w = db.query(Word).get(word)
if w and w.syllables:
return w.syllables
# If we get here, we're going to have to make a call to __syllables__
if not w:
w = Word()
w.word = word
db.add(w)
try:
w.syllables = __syllables__(word)
except:
no_syllables_data.add(word)
print "Lookup for", word, "failed."
raise Exception("Couldn't find syllables count for "+word)
db.commit()
return w.syllables
def frequency( word ):
word = word.lower()
db = init('word')
w = db.query(Word).get(word)
if w and w.frequency:
return w.frequency
# If we get here, we're going to have to make a call to __frequency__
if not w:
w = Word()
w.word = word
db.add(w)
w.frequency = __frequency__(word)
db.commit()
return w.frequency