h2_t = h2.fprop([[h1_t], [s2_tm1]], params) h3_t = h3.fprop([[h2_t], [s2_tm1]], params) output_t = output.fprop([h1_t, h2_t, h3_t], params) return h1_t, h2_t, h3_t, output_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) ts, _, _ = y_hat_temp.shape y_hat_in = y_hat_temp.reshape((ts*batch_size, -1)) y_in = y.reshape((ts*batch_size, -1)) cost = NllBin(y_in, y_hat_in) cost_temp = cost.reshape((ts, batch_size)) cost = cost_temp * mask nll = cost.sum() / mask.sum() cost = cost.sum(axis=0).mean() cost.name = 'cost' nll.name = 'nll' model.inputs = [x, y, mask] model.params = params model.nodes = nodes optimizer = RMSProp( lr=0.0001, mom=0.95 )
h2_t = h2.fprop([[h1_t], [s2_tm1]]) h3_t = h3.fprop([[h2_t], [s2_tm1]]) output_t = output.fprop([h1_t, h2_t, h3_t]) return h1_t, h2_t, h3_t, output_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) ts, _, _ = y_hat_temp.shape y_hat_in = y_hat_temp.reshape((ts * batch_size, -1)) y_in = y.reshape((ts * batch_size, -1)) cost = NllBin(y_in, y_hat_in) cost_temp = cost.reshape((ts, batch_size)) cost = cost_temp * mask nll = cost.sum() / mask.sum() cost = cost.sum(axis=0).mean() cost.name = 'cost' nll.name = 'nll' model.inputs = [x, y, mask] model._params = params model.nodes = nodes optimizer = RMSProp(lr=0.0001, mom=0.95) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100),