print('[train]') print('going to train {} iterations'.format(jump * n_epoch)) # loop starts while epoch <= n_epoch: # initialize hidden state to 0 state = make_initial_state() # train dataset train_data = dg.generate_seq(whole_len) for i in six.moves.range(jump): # forward propagation h_batch = sae.encode(chainer.Variable(mod.asarray([train_data['image'][i]], dtype='float32'))).data[0] tmp = train_data['direction'][i] + h_batch.tolist() x_batch = mod.asarray([tmp], dtype='float32') t_batch = sae.encode(chainer.Variable(mod.asarray([train_data['image'][i + 1]], dtype = 'float32'))).data state, loss_i, acc_i = forward_one_step(x_batch, t_batch, state) accum_loss += loss_i cur_log_perp += loss_i.data.reshape(()) # truncated BPTT if (i + 1) % bprop_len == 0: optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate accum_loss = chainer.Variable(mod.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) # gradient clip