h1_t = h1.fprop([[x_t], [s1_tm1]], params) h2_t = h2.fprop([[h1_t], [s2_tm1]], params) h3_t = h3.fprop([[h2_t], [s2_tm1]], params) output_t = output.fprop([h1_t, h2_t, h3_t], params) return h1_t, h2_t, h3_t, output_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) ts, _, _ = y_hat_temp.shape y_hat_in = y_hat_temp.reshape((ts*batch_size, -1)) y_in = y.reshape((ts*batch_size, -1)) cost = NllBin(y_in, y_hat_in) cost_temp = cost.reshape((ts, batch_size)) cost = cost_temp * mask nll = cost.sum() / mask.sum() cost = cost.sum(axis=0).mean() cost.name = 'cost' nll.name = 'nll' model.inputs = [x, y, mask] model.params = params model.nodes = nodes optimizer = RMSProp( lr=0.0001, mom=0.95 )
def fprop(self, X): cost = NllBin(X[0], X[1]) if self.use_sum: return cost.sum() else: return cost.mean()
canvas_out = canvas.fprop([[write_out], [canvas_tm1]]) return enc_out, dec_out, canvas_out, kl_out ((enc_out, dec_out, canvas_out, kl_out), updates) =\ theano.scan(fn=inner_fn, outputs_info=[enc.get_init_state(), dec.get_init_state(), canvas.get_init_state(), None], non_sequences=[x], n_steps=n_steps) for k, v in updates.iteritems(): k.default_update = v recon_term = NllBin(x, T.nnet.sigmoid(canvas_out[-1])).mean() kl_term = kl_out.sum(axis=0).mean() cost = recon_term + kl_term cost.name = 'cost' recon_term.name = 'recon_term' kl_term.name = 'kl_term' recon_err = ((x - T.nnet.sigmoid(canvas_out[-1]))**2).mean() / x.std() recon_err.name = 'recon_err' model.inputs = [x] model._params = params model.nodes = nodes optimizer = Adam( lr=0.001 )
return enc_out, dec_out, canvas_out, kl_out ((enc_out, dec_out, canvas_out, kl_out), updates) =\ theano.scan(fn=inner_fn, outputs_info=[enc.get_init_state(), dec.get_init_state(), canvas.get_init_state(), None], non_sequences=[x], n_steps=n_steps) for k, v in updates.iteritems(): k.default_update = v recon_term = NllBin(x, T.nnet.sigmoid(canvas_out[-1])).mean() kl_term = kl_out.sum(axis=0).mean() cost = recon_term + kl_term cost.name = 'cost' recon_term.name = 'recon_term' kl_term.name = 'kl_term' recon_err = ((x - T.nnet.sigmoid(canvas_out[-1]))**2).mean() / x.std() recon_err.name = 'recon_err' model.inputs = [x] model._params = params model.nodes = nodes optimizer = Adam(lr=0.001) extension = [
h1_t = h1.fprop([[x_t], [s1_tm1]]) h2_t = h2.fprop([[h1_t], [s2_tm1]]) h3_t = h3.fprop([[h2_t], [s2_tm1]]) output_t = output.fprop([h1_t, h2_t, h3_t]) return h1_t, h2_t, h3_t, output_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) ts, _, _ = y_hat_temp.shape y_hat_in = y_hat_temp.reshape((ts * batch_size, -1)) y_in = y.reshape((ts * batch_size, -1)) cost = NllBin(y_in, y_hat_in) cost_temp = cost.reshape((ts, batch_size)) cost = cost_temp * mask nll = cost.sum() / mask.sum() cost = cost.sum(axis=0).mean() cost.name = 'cost' nll.name = 'nll' model.inputs = [x, y, mask] model._params = params model.nodes = nodes optimizer = RMSProp(lr=0.0001, mom=0.95) extension = [ GradientClipping(batch_size=batch_size),
init_U=init_U, init_b=init_b) h4 = FullyConnectedLayer(name='h4', parent=['h1', 'h2', 'h3'], nout=nlabel, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [h1, h2, h3, h4] rnn = Net(inputs=inputs, inputs_dim=inputs_dim, nodes=nodes) y_hat = rnn.build_recurrent_graph(output_args=[h4])[0] masked_y = y[mask.nonzero()] masked_y_hat = y_hat[mask.nonzero()] cost = NllBin(masked_y, masked_y_hat).sum() nll = NllBin(masked_y, masked_y_hat).mean() cost.name = 'cost' nll.name = 'nll' model.graphs = [rnn] optimizer = RMSProp( lr=0.0001, mom=0.95 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=10, ddout=[cost, nll],