-
Notifications
You must be signed in to change notification settings - Fork 0
/
hmm_alpha2.py
99 lines (88 loc) · 2.09 KB
/
hmm_alpha2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import sys
import math
import logsum
import copy
def main(fileName,fileNameSec,emitFile,priorFile):
fileName=sys.argv[1]
fileNameSec=sys.argv[2]
emitFile=sys.argv[3]
priorFile=sys.argv[4]
with open(fileName,"rb") as txt:
f=txt.readlines()
sentence=[]
for word in f:
sentence.append(word[:-1])
with open(fileNameSec,"rb") as txt:
f=txt.readlines()
transList=[]
for word in f:
transList.append(word[:-1])
with open(emitFile,"rb") as txt:
f=txt.readlines()
emitList=[]
for word in f:
emitList.append(word[:-1])
with open(priorFile,"rb") as txt:
f=txt.readlines()
priorList=[]
for word in f:
priorList.append(word[:-1])
initDict=initState(priorList)
transDict=initTran(transList)
emitDict=initEmit(emitList)
for sent in sentence:
train(initDict,transDict,emitDict,sent)
def initState(priorList):
d=dict()
for i in priorList:
d[i[0:2]] =(math.log(float(i[3:])))
return d
def initTran(transList):
i=0
d=dict()
for i in xrange(len(transList)):
string=transList[i]
newList=string.split()
tempDict=initState(newList[1:])
d[newList[0]]=tempDict
return d
def initEmit(emitList):
i=0
d=dict()
for i in xrange(len(emitList)):
string =emitList[i]
newList=string.split()
tempDict=dict()
for i in newList[1:]:
index=i.index(":")
a=((float(i[(index+1):])))
tempDict[i[:index]]=math.log(a)
d[newList[0]]=tempDict
return d
def train(initDict,transDict,emitDict,sentence):
sentence=sentence.split()
currentDict=dict()
for key in initDict:
currentDict[key]=initDict[key]+emitDict[key][sentence[0]]
#Done step1, initializing
for word in sentence[1:]:
for i in currentDict:
alphaCurrent=emitDict[i][word]
prevSum=0
for j in currentDict:
alphaPrev=initDict[j]
transProb=transDict[j][i]
if prevSum==0:
prevSum=alphaPrev+transProb
else:
prevSum=logsum.log_sum(prevSum,alphaPrev+transProb)
currentDict[i]=prevSum+alphaCurrent
initDict=copy.deepcopy(currentDict)
totalP=0
for i in currentDict:
if totalP==0:
totalP=currentDict[i]
else:
totalP=logsum.log_sum(totalP,currentDict[i])
print totalP
main()