forked from dirko/pyhcrf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hcrf.py
351 lines (311 loc) · 11.2 KB
/
hcrf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File: hcrf.py
# Author: Dirko Coetsee
# Date: 13 Augustus 2013
# A script to train and test an HCRF for sparse input vectors.
#
# TODO: - Add support for more than one feature on each time step.
# - Add feature weights.
# - Change inference to use more efficient matrix routines.
from numpy import array, zeros
from numpy import exp, log, inf
from random import random, seed
import sys
from scipy.optimize.lbfgsb import fmin_l_bfgs_b
class hcrf:
"""
Represent the HCRF model.
Includes methods for training using LM-BFGS, scoring, and testing, and
helper methods for loading and saving parameter values to and from file.
"""
def reset_tables(self):
T = self.T
S = self.S
W = self.W
self.A = zeros((T, S, W))
self.B = zeros((T, S, W))
self.C = zeros((T, S, W))
self.D = zeros((T, S, S, W))
def reset_deriv(self):
self.der = zeros(len(self.param))
self.ll = 0.0
def load_example(self, x):
# Fill forward, backward, and combination tables for example x
self.T = len(x)
self.reset_tables()
self.x = x
self.fill_A()
self.fill_B()
self.fill_C()
# Return the predicted distribution over classes
return [sum(self.C[self.T - 1, :, y]) for y in xrange(self.W)]
def get_deriv(self, x, y):
# Return the log-likelihood and derivative of the parameters for
# a training example with features x and label y.
self.load_example(x)
self.fill_D()
C = self.C
D = self.D
T = self.T
W = self.W
S = self.S
der = self.der
ll = log(sum(C[T - 1, :, y]))
self.ll += ll
# Factors without output vairable interaction
for t in xrange(T):
E_ef_norm = sum(C[t, :, y])
for w in xrange(W):
for s in xrange(S):
E_f = C[t, s, w]
E_ef = C[t, s, w]
if w != y:
E_ef = 0
f_list = self.get_fs(t, s, w)
E_ef_n = E_ef / E_ef_norm
for f in f_list:
der[f] += E_ef / E_ef_norm - E_f
# Factors containing two output variables
for t in xrange(T - 1):
E_ef_norm = sum(sum(D[t, :, :, y]))
for w in xrange(W):
for s in xrange(S):
for ps in xrange(S):
E_f = D[t, s, ps, w]
E_ef = D[t, s, ps, w]
if w != y:
E_ef = 0
f_list = self.get_fss(t, s, ps, w)
E_ef_n = E_ef / E_ef_norm
for f in f_list:
der[f] += E_ef_n - E_f
return ll, der
def fill_A(self):
# Fill the forward table.
T = self.T
S = self.S
W = self.W
A = self.A
for w in xrange(W):
A[0, 0, w] = exp(self.get_f_single(0, 0, w))
for t in xrange(1, T):
for w in xrange(W):
for s in xrange(S):
for ps in xrange(S):
A[t, s, w] = A[t, s, w] + A[t - 1, ps, w] * exp(self.get_f(t, s, ps, w))
norm = sum(sum(A[t, :, :]))
A[t, :, :] /= norm
def fill_B(self):
# Fill the backward table.
T = self.T
S = self.S
W = self.W
B = self.B
for w in xrange(W):
B[T - 1, S - 1, w] = exp(self.get_f_single(T - 1, S - 1, w))
for t in xrange(T - 2, -1, -1):
for w in xrange(W):
for s in xrange(S):
for ps in xrange(S):
B[t, ps, w] = B[t, ps, w] + B[t + 1, s, w] * exp(self.get_f(t + 1, s, ps, w))
norm = sum(sum(B[t, :, :]))
B[t, :, :] /= norm
def fill_C(self):
# Fill and normalize table with product of forward and backward tables.
self.C = self.A * self.B
for t in xrange(self.T):
norm = sum(sum(self.C[t, :, :]))
self.C[t, :, :] = self.C[t, :, :] / norm
def fill_D(self):
# Get probability table.
T = self.T
S = self.S
W = self.W
A = self.A
B = self.B
D = self.D
for t in xrange(T - 1):
for w in xrange(W):
for s in xrange(S):
for ps in xrange(S):
D[t, s, ps, w] = A[t, ps, w] * B[t + 1, s, w] * exp(self.get_f(t + 1, s, ps, w))
norm = sum(sum(sum(D[t, :, :, :])))
D[t, :, :, :] /= norm
def get_f(self, t, p, ps, w):
# Fill cell in forward or backward table.
f_list = self.get_fs(t, p, w)
f_list += self.get_fss(t, p, ps, w)
return sum(self.param[f] for f in f_list)
def get_f_single(self, t, p, w):
# Potential of non-interaction cell
f_list = self.get_fs(t, p, w)
return sum(self.param[f] for f in f_list)
def get_fs(self, t, p, w):
# Get list of parameters activated at time t, hidden variable p,
# and output variable w.
S = self.S
W = self.W
flist = []
for f in self.x[t]:
flist += [S * S + (p * W + w) + W * S * f]
return flist
def get_fss(self, t, p, ps, w):
# Get list of parameters activated at time t, hidden variable p,
# previous hidden variable ps, and output variable w.
S = self.S
return [p * S + ps]
def train_lmbfgs(self):
"""
Train the model by maximising posterior with LM-BFGS.
The training data should have been set at this stage:
>> h = hcrf(H, maxw, maxf)
>> h.X = X
>> h.Y = Y
>> h.lamb = lamb
>> final_params = h.train_lmbfgs()
Return the final parameter vector.
"""
initial = self.param[self.param_non_inf_indexes]
fparam = fmin_l_bfgs_b(self.get_obj, initial)
return fparam
def get_obj(self, npar, *args):
# Use get_deriv to find the objective and its derivative for training.
self.param[self.param_non_inf_indexes] = npar
self.reset_deriv()
for x, y in zip(self.X, self.Y):
self.get_deriv(x, y)
der = self.der[self.param_non_inf_indexes]
ll = -self.ll - (-sum(self.lamb * 0.5 * xx ** 2.0 for xx in npar))
der = -der - (-npar * self.lamb)
# Print the log-likelihood
print self.ll
return ll, der
def test(self):
"""
Test the current input data on the current model.
Prints a confusion matrix, the number of correctly labeled examples,
the total number of examples, and accuracy to standard out.
"""
total = 0
cor = 0
conf = zeros((self.W, self.W))
for x, y in zip(self.X, self.Y):
pred = self.load_example(x)
py = max(zip(pred, range(len(pred))), key=lambda x: x[0])[1]
if py == y:
cor += 1
conf[y, py] += 1
total += 1
print conf
print cor, total, cor * 1.0 / total
def label(self):
"""
Predict the labels of current input data and print to standard out.
"""
for x in X:
pred = self.load_example(x)
py = max(zip(pred, range(len(pred))), key=lambda x: x[0])[1]
print py
def load_params(self, filename):
"""
Take parameters from file and set to current parameters.
"""
f = open(filename, 'r')
p = array([float(line.strip("\n")) for line in f])
self.param[self.param_non_inf_indexes] = p
def save_params(self, filename):
"""
Take current parameters and write to file.
"""
f = open(filename, 'w')
for p in self.param[self.param_non_inf_indexes]:
f.write(str(p) + "\n")
f.close()
def __init__(self, S, W, F):
"""
Initialize new HCRF object with hidden units with cardinality S,
output units that can take on W values, and binary indicator features
from 1 to F.
"""
self.S = S
self.W = W
seed(2)
# Initialize the parameters uniformly random on [0, 0.1]
self.param = array([random() * 0.1 for i in xrange(S * S + S * W * F)])
for s in xrange(S):
for ps in xrange(S):
# Make it impossible for hidden units to remain in state 0.
if ps == 0 and s == 0:
self.param[s * S + ps] = -inf
# Hidden units can also not stay in last state S-1.
if ps == S - 1 and s == S - 1:
self.param[s * S + ps] = -inf
# Hidden states can only go to higher state or stay in the
# same state.
if s < ps:
self.param[s * S + ps] = -inf
self.param_non_inf_indexes = [i for i in xrange(len(self.param)) if self.param[i] != -inf]
def load_data(filename):
"""
Load training or testing data from file.
Return input vectors X, labels Y, cardinality of y in Y, and cardinality
of x in X.
"""
f = open(filename, "r")
X = []
Y = []
maxf = 0
maxw = 0
for line in f:
toks = [int(tok) for tok in line.strip(" \n").split(" ")]
y = toks[0]
if int(y) > maxw:
maxw = int(y)
x = [[tok] for tok in toks[1:]]
for t in toks[1:]:
if t > maxf:
maxf = t
Y += [y]
X += [x]
return X, Y, maxf + 1, maxw + 1
if __name__ == "__main__":
usage = """
Train, test, or use an HCRF to label input data.
usage: hcrf.py mode datafile paramfile H [lamb]
mode: Set the script mode to train, tst, or label.
datafile: File containing input datapoints.
Format: lines consisting of
label 1 [feat1 feat2 ... ] 2
where label is a non-negative integer, 1 is the special start
of datapoint feature, 2 is the special end of datapoint feature,
and feat1, feat2 etc. are integers > 2 representing features
activated at the first, second etc. time steps.
paramfile: File to store/retrieve parameters.
H: Cardinality of hidden units. Must be >= 3.
lamb: l2 reguralization constant. Only applicable when mode is train.
"""
datafile = sys.argv[2]
paramfile = sys.argv[3]
H = int(sys.argv[4])
if sys.argv[1] == "train":
lamb = float(sys.argv[5])
X, Y, maxf, maxw = load_data(datafile)
h = hcrf(H, maxw, maxf)
h.X = X
h.Y = Y
h.lamb = lamb
final_params = h.train_lmbfgs()
h.param[h.param_non_inf_indexes] = final_params[0]
h.save_params(paramfile)
if sys.argv[1] == "tst":
X, Y, maxf, maxw = load_data(datafile)
h = hcrf(H, maxw, maxf)
h.load_params(paramfile)
h.X = X
h.Y = Y
h.test()
if sys.argv[1] == "label":
X, Y, maxf, maxw = load_data(datafile)
h = hcrf(H, maxw, maxf)
h.load_params(paramfile)
h.X = X
h.label()