h3_t = h3.fprop([[x_t, h2_t], [h3_tm1, h1_tm1, h2_tm1]]) return h1_t, h2_t, h3_t ((h1_temp, h2_temp, h3_temp), updates) = theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0]) ts, _, _ = y.shape post_scan_shape = ((ts*batch_size, -1)) h1_in = h1_temp.reshape(post_scan_shape) h2_in = h2_temp.reshape(post_scan_shape) h3_in = h3_temp.reshape(post_scan_shape) y_hat_in = output.fprop([h1_in, h2_in, h3_in]) cost = NllMulInd(y.flatten(), y_hat_in) cost = cost.mean() cost.name = 'cost' model.inputs = [x, y] model._params = params model.nodes = nodes model.set_updates(update_list) optimizer = Adam( lr=0.001 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100),
# You will fill in a list of nodes nodes = [h1, output] # Initalize the nodes for node in nodes: node.initialize() params = flatten([node.get_params().values() for node in nodes]) # Build the Theano computational graph h1_out = h1.fprop([x]) y_hat = output.fprop([h1_out]) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' model.inputs = [x, y] model._params = params model.nodes = nodes # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = RMSProp(lr=0.001) extension = [ GradientClipping(), EpochCount(40), Monitoring(freq=100,
return h1_t, h2_t, h3_t ((h1_temp, h2_temp, h3_temp), updates) = theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0]) ts, _, _ = y.shape post_scan_shape = ((ts * batch_size, -1)) h1_in = h1_temp.reshape(post_scan_shape) h2_in = h2_temp.reshape(post_scan_shape) h3_in = h3_temp.reshape(post_scan_shape) y_hat_in = output.fprop([h1_in, h2_in, h3_in]) cost = NllMulInd(y.flatten(), y_hat_in) cost = cost.mean() cost.name = 'cost' model.inputs = [x, y] model._params = params model.nodes = nodes model.set_updates(update_list) optimizer = Adam(lr=0.001) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[cost]), Picklize(freq=100, path=save_path)
# Initalize the nodes for node in nodes: node.initialize() # Collect parameters params = flatten([node.get_params().values() for node in nodes]) # Build the Theano computational graph h1_out = h1.fprop([x]) d1_out = d1.fprop([h1_out]) h2_out = h2.fprop([d1_out]) d2_out = d2.fprop([h2_out]) y_hat = output.fprop([d2_out]) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' d1.set_mode(1) d2.set_mode(1) mn_h1_out = h1.fprop([mn_x]) mn_h2_out = h2.fprop([mn_h1_out]) mn_y_hat = output.fprop([mn_h2_out]) mn_cost = NllMulInd(mn_y, mn_y_hat).mean() mn_err = error(predict(mn_y_hat), mn_y) mn_cost.name = 'cross_entropy' mn_err.name = 'error_rate'