-
Notifications
You must be signed in to change notification settings - Fork 0
/
forward.py
126 lines (76 loc) · 2.06 KB
/
forward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
from logsum import log_sum
import sys
devfile = open(sys.argv[1], "r")
devfile = devfile.readlines()
devfile = map(lambda s: s.strip(), devfile)
transfile = open(sys.argv[2], "r")
transfile = transfile.readlines()
transfile = map(lambda s: s.strip(), transfile)
emitfile = open(sys.argv[3], "r")
emitfile = emitfile.readlines()
emitfile = map(lambda s: s.strip(), emitfile)
priorfile = open(sys.argv[4], "r")
priorfile = priorfile.readlines()
priorfile = map(lambda s: s.strip(), priorfile)
def transMatrix():
trans = []
for line in transfile:
t = []
line = line.split(" ")
#print line
for i in range(1, len(line)):
prob = line[i].split(":")[1]
t.append(float(prob))
trans.append(t)
return np.array(trans)
def priorMatrix():
prior = []
for line in priorfile:
line = line.split(" ")
prior.append(float(line[1]))
return np.array(prior).reshape(8,1)
def emitMatrix():
d = {}
for i in range(0, len(emitfile)):
line = emitfile[i]
line = line.split(" ")
#print line[791].split(":")[1]
for j in range(1, len(line)):
if i == 0:
d[line[j].split(":")[0]] = []
prob = line[j].split(":")[1]
d[line[j].split(":")[0]].append(float(prob))
return d
def forward():
emit = emitMatrix()
trans = transMatrix()
prior = priorMatrix()
for line in devfile:
line = line.split(" ")
o = line[0]
probMatrix = np.log(prior) + np.log(np.array(emit[o]).reshape(8,1))
# loop throgh words in line
#print probMatrix
#sys.exit(0)
for z in range(1, len(line)):
o = line[z]
emitprob = np.array(emit[o]).reshape(8,1)
# loop through each t+1 state
m = []
for j in range (0, 8):
temp = np.log(trans[:,j].reshape(8,1)) + (probMatrix)
sum = temp[0]
# sum up probs
for k in range(1, 8):
sum = log_sum(sum, temp[k])
prob = np.log(emitprob[j]) + sum
m.append(prob[0])
probMatrix = np.array(m).reshape(8,1)
#print probMatrix
#sys.exit(0)
total = probMatrix[0]
for h in range(1, 8):
total = log_sum(total, probMatrix[h])
print total[0]
forward()