/
train_dnn.py
189 lines (154 loc) · 5.9 KB
/
train_dnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2015 truong-d <truong-d@ahclab08>
#
# Distributed under terms of the MIT license.
"""
"""
from __future__ import print_function
from easy_dnn.nnet_model import NNET_Model
from chainer import computational_graph as c
import easy_dnn.data_util as data_util
from chainer import cuda
import chainer
import easy_dnn.misc as misc
import chainer.links as L
import chainer.functions as F
from chainer import Variable
import numpy as np
import sys
opts, args = misc.get_opts()
xp = cuda.cupy if opts.gpu >= 0 else np
if opts.random_seed is not None:
np.random.seed(opts.random_seed)
def setup_training(nnet_model, opts):
from chainer import optimizers
if opts.loss_function == 'cross_entropy':
model = L.Classifier(nnet_model)
model.compute_accuracy = False
elif opts.loss_function == 'mean_squared_error':
model = L.Classifier(nnet_model, lossfun=F.mean_squared_error)
model.compute_accuracy = False
else:
misc.error('Loss function %s is not supported!' % opts.loss_function)
if opts.momentum is not None:
optimizer = optimizers.MomentumSGD(lr=opts.lr, momentum=opts.momentum)
else:
optimizer = optimizers.SGD(lr=opts.lr)
optimizer.setup(nnet_model)
optimizer.add_hook(chainer.optimizer.GradientClipping(opts.grad_clip))
return optimizer, model
def print_stats(percentage, epoch, lr, loss):
print('\r#Epoch {0}, lr: {1} [{2}] {3}% loss: {4}'.format(
epoch, "%.6f" % lr, '#' * (percentage / 5),
percentage, "%.3f" % loss), end='')
def evaluation(model, fname, show_progress=False):
model.predictor.forget_history()
model.predictor.to_cpu()
devset = data_util.load_data(fname)
dev_data_resource = data_util.data_spliter(devset, batchsize=1, n_epoch=1)
dev_loss = 0
pred = []
target = []
for dev_idx, x_batch, y_batch, epoch, percentage, eos in dev_data_resource:
if not dev_idx:
break
if show_progress:
print_stats(percentage, 0, 0, dev_loss)
x = Variable(np.asarray(x_batch))
t = Variable(np.asarray(y_batch))
loss_i = model(x, t)
target.append(y_batch[0])
pred.append(model.y.data[0].argmax())
dev_loss = (dev_loss * (dev_idx - 1) + loss_i.data) / dev_idx
if eos and opts.forget_on_new_utt:
model.predictor.forget_history()
model.predictor.forget_history()
return dev_loss, pred, target
def train_nnet(model, optimizer, train_data_resource, opts):
if opts.gpu >= 0:
cuda.check_cuda_available()
model.to_gpu(opts.gpu)
accum_loss = 0
i = 0
train_loss = 0
prev_dev_loss = 100000
prev_percentage = 0
dump_graph = True
for train_idx, x_batch, y_batch, epoch, percentage, eos in train_data_resource:
if train_idx is None: # Done one epoch
if opts.fname_dev:
dev_loss, _, _ = evaluation(model, opts.fname_dev)
if xp == cuda.cupy:
model.to_gpu()
print(' dev loss: %.3f' % dev_loss, end='')
if optimizer.lr < opts.lr_stop:
break
if prev_dev_loss - dev_loss < opts.start_decay:
optimizer.lr *= opts.lr_decay
print('\n...reducing lr to %.6f' % optimizer.lr)
prev_dev_loss = dev_loss
print('')
continue
x = Variable(xp.asarray(x_batch))
t = Variable(xp.asarray(y_batch))
loss_i = model(x, t)
accum_loss += loss_i
if dump_graph:
print('Dump graph')
with open('graph.dot', 'w') as o:
o.write(c.build_computational_graph((loss_i, )).dump())
dump_graph = False
if train_idx >= 1:
train_loss = (train_loss * (train_idx - 1) + loss_i.data) / train_idx
if eos and opts.forget_on_new_utt:
model.predictor.forget_history()
if eos or (i + 1) % opts.bprop_len == 0:
model.zerograds()
accum_loss.backward()
accum_loss.unchain_backward()
accum_loss = 0
optimizer.update()
i = 0
if percentage != prev_percentage:
prev_percentage = percentage
print_stats(percentage, epoch, optimizer.lr, train_loss)
sys.stdout.flush()
i += 1
def train_mode():
nnet_model = None
if opts.nnet_struct:
nnet_model = NNET_Model.parse_structure(opts.nnet_struct)
elif opts.fname_in_model:
nnet_model = NNET_Model.load(opts.fname_in_model)
trainset = data_util.load_data(opts.fname_train)
eos_pad = misc.get_pading(opts.eos_pad)
train_data_resource = data_util.data_spliter(trainset, batchsize=opts.batchsize,
n_epoch=opts.n_epoch, EOS=eos_pad)
optimizer, model = setup_training(nnet_model, opts)
train_nnet(model, optimizer, train_data_resource, opts)
if opts.fname_test:
print('====================TESTING=========================')
test_loss, pred, target = evaluation(model, opts.fname_test, show_progress=True)
if 'cross_entropy' in opts.loss_function:
misc.f_measure(pred, target)
print(' test loss: %.3f' % test_loss)
if opts.fname_out_model:
nnet_model.save(opts.fname_out_model)
def test_mode():
loaded_model = NNET_Model.load(opts.fname_in_model)
_, model = setup_training(loaded_model, opts)
print('====================TESTING=========================')
test_loss, pred, target = evaluation(model, opts.fname_test, show_progress=True)
print(' test loss: %.3f' % test_loss)
if 'cross_entropy' in opts.loss_function:
misc.f_measure(pred, target)
def main():
if opts.fname_train:
train_mode()
elif opts.fname_test:
test_mode()
if __name__ == '__main__':
main()