forked from madirey/pynlg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
textGenExamples.py
78 lines (64 loc) · 3.03 KB
/
textGenExamples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys
sys.path.append('/home/mcaldwell/capstone/libs')
from textGenerator import *
from nltk.corpus import brown
ALICE = \
"c:\\documents and settings\\matt\\desktop\\Capstone\\corpora\\alice.txt"
TWENTY_THOUSAND_LEAGUES = \
"c:\\documents and settings\\matt\\desktop\\Capstone\\corpora\\twentyThousandLeagues.txt"
FILE_NAME = ALICE
INITIAL_WORD = "alice"
LENGTH = 100
#tokenizer = WhitespaceTokenizer()
regexp = r'(\w+)|(\$\d+\.\d+)|([^\w\s]+)|(\.+)|([\.,:;\"\`!()?])|(([Mm]r|[Mm]s|[Mm]rs|[Dd]r)\.)'
tokenizer = RegexpTokenizer(regexp)
def showExamples():
print "UnWeightedProbabilityModel...\n"
model = UnWeightedProbabilityModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
print "\n\nWeightedProbabilityModel...\n"
model = WeightedProbabilityModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
print "\n\nUnWeightedMutualInformationModel...\n"
model = UnWeightedMutualInformationModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
print "\n\nWeightedMutualInformationModel...\n"
model = WeightedMutualInformationModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
print "\n\nUnWeightedTScoreModel...\n"
model = UnWeightedTScoreModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
print "\n\nWeightedTScoreModel...\n"
model = WeightedTScoreModel(FILE_NAME, tokenizer)
TextGenerator(model).generateWords(INITIAL_WORD, LENGTH)
def testLengthOfText(brownSection):
items = brown.items(brownSection)
textToken = brown.read(items[0])
for i in range(len(items)):
print "\n\nWeightedTaggedTScoreModel\n"
model = WeightedTaggedTScoreModel(textToken, SUBTOKENS='WORDS')
TextGenerator(model).generateWords(('the','at'),100)
textToken['WORDS'] = textToken['WORDS'] + brown.read(items[i+1])['WORDS']
def main():
print "\n\nWeightedTaggedTScoreModel\n"
items = brown.items('fiction: general')
textToken = brown.read(items[0])
for item in items[1:]:
textToken['WORDS'] = textToken['WORDS'] + brown.read(item)['WORDS']
model = WeightedTaggedTScoreModel(textToken, SUBTOKENS='WORDS')
TextGenerator(model).generateWords(('the','at'), 200)
print "\n\nWeightedTScoreModel...\n"
model = WeightedTScoreModel(textToken, SUBTOKENS='WORDS')
TextGenerator(model).generateWords('the',200)
# from nltk.corpus import genesis
# textToken = genesis.read('english-kjv.txt')
# #model = UnWeightedProbabilityModel(textToken, SUBTOKENS='WORDS')
# #TextGenerator(model).generateWords('God', 100)
# model = WeightedProbabilityModel(textToken, SUBTOKENS = 'WORDS')
# TextGenerator(model).generateWords('God', 100)
# print "\n"
# model = WeightedTScoreModel(textToken, SUBTOKENS='WORDS')
# TextGenerator(model).generateWords('the', 100)
#main()
testLengthOfText('fiction: general')
#testVerySmallTexts('fiction: general')