/
syllables.py
85 lines (68 loc) · 1.98 KB
/
syllables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from BeautifulSoup import BeautifulSoup
import urllib
def makePoetrySet(fileName):
wordFile = open(fileName)
poetrySet = []
for line in wordFile:
line = line.strip()
poetrySet.append(line)
poetrySet = set(poetrySet)
return poetrySet
def makeSyllableFile(fileName, poetrySet):
wordFile = open(fileName)
newFile = open('wordDict.txt', 'w')
for line in wordFile:
line = line.strip()
line = line.split("\\")
pos = line[-1]
word = line[:-1]
word = ''.join(word)
if word in poetrySet:
sCount = get_syllables(word)
if sCount == 0:
sCount = 1
wStr = ''
for w in word:
wStr += w + " "
newLine = str(word) + '\t' + pos + '\t' + str(sCount)
print >>newFile, newLine
wordFile.close()
newFile.close()
def make_syllables(fileName, poetrySet):
wordFile = open(fileName)
newFile = open('verbDict.txt', 'w')
for line in wordFile:
line = line.strip()
line = line.split()
word = line[0]
sCount = get_syllables(word)
newLine = str(word) + '\t' + str(sCount) + '\n'
print >> newFile, newLine
wordFile.close()
newFile.close()
def get_syllables(word):
url = 'http://www.wordcalc.com/index.php'
post_data = urllib.urlencode(
{'text': word})
post_data = '%s&optionSyllableCount&optionWordCount' % post_data
cnxn = urllib.urlopen(url, post_data)
response = cnxn.read()
cnxn.close()
soup = BeautifulSoup(response)
h3_matches = [h3 for h3 in soup.findAll('h3') if h3.text == 'Statistics']
if len(h3_matches) != 1:
raise Exception('Wrong number of <h3>Statistics</h3>')
h3_match = h3_matches[0]
table = h3_match.findNextSibling('table')
td_matches = [td for td in table.findAll('td')
if td.text == 'Syllable Count']
if len(td_matches) != 1:
raise Exception('Wrong number of <td>Syllable Count</td>')
td_match = td_matches[0]
td_value = td_match.findNextSibling('td')
syllable_count = int(td_value.text)
return syllable_count
def main():
poetry = makePoetrySet('poeticWords.txt')
makeSyllableFile('mobypos.txt', poetry)
main()