def build_model_residual(args, dtype=floatX): logger.info('Building model ...') # Return list of 3D Tensor, one for each layer # (Time X Batch X embedding_dim) pre_rnn, x_mask = get_prernn(args) rnn = get_rnn(args) # Prepare inputs and initial states for the RNN kwargs, inits = get_rnn_kwargs(pre_rnn, args) # Apply the RNN to the inputs h = rnn.apply(low_memory=True, mask=x_mask, **kwargs) # We have # h = [state, state_1, state_2 ...] if args.layers > 1 # h = state if args.layers == 1 # If we have skip connections, concatenate all the states # Else only consider the state of the highest layer last_states = {} hidden_states = [] if args.layers > 1: # Save all the last states for d in range(args.layers): # TODO correct bug # h[d] = h[d] * x_mask last_states[d] = h[d][-1, :, :] h[d].name = "hidden_state_" + str(d) hidden_states.append(h[d]) if args.skip_connections or args.skip_output: h = tensor.concatenate(h, axis=2) else: h = h[-1] else: # TODO correct bug # hidden_states.append(h * x_mask) hidden_states.append(h) hidden_states[0].name = "hidden_state_0" # Note: if we have mask, then updating initial state # with last state does not make sence anymore. last_states[0] = h[-1, :, :] # The updates of the hidden states updates = [] for d in range(args.layers): updates.append((inits[0][d], last_states[d])) presoft = get_presoft(h, args) cost, unregularized_cost = get_costs(presoft, args) return cost, unregularized_cost, updates, hidden_states
def build_model_lstm(args, dtype=floatX): logger.info('Building model ...') # Return list of 3D Tensor, one for each layer # (Time X Batch X embedding_dim) pre_rnn, x_mask = get_prernn(args) rnn = get_rnn(args) # Prepare inputs and initial states for the RNN kwargs, inits = get_rnn_kwargs(pre_rnn, args) # Apply the RNN to the inputs h = rnn.apply(mask=x_mask, **kwargs) # h = [state, cell, in, forget, out, state_1, # cell_1, in_1, forget_1, out_1 ...] last_states = {} last_cells = {} hidden_states = [] for d in range(args.layers): # TODO correct bug # h[5 * d] = h[5 * d] * x_mask # h[5 * d + 1] = h[5 * d + 1] * x_mask last_states[d] = h[5 * d][-1, :, :] last_cells[d] = h[5 * d + 1][-1, :, :] h[5 * d].name = "hidden_state_" + str(d) h[5 * d + 1].name = "hidden_cell_" + str(d) hidden_states.extend([h[5 * d], h[5 * d + 1]]) # The updates of the hidden states # Note: if we have mask, then updating initial state # with last state does not make sence anymore. updates = [] for d in range(args.layers): updates.append((inits[0][d], last_states[d])) updates.append((inits[1][d], last_states[d])) # h = [state, cell, in, forget, out, state_1, # cell_1, in_1, forget_1, out_1 ...] # Extract the values in_gates = h[2::5] forget_gates = h[3::5] out_gates = h[4::5] gate_values = {"in_gates": in_gates, "forget_gates": forget_gates, "out_gates": out_gates} h = h[::5] # Now we have correctly: # h = [state, state_1, state_2 ...] if args.layers > 1 # h = [state] if args.layers == 1 # If we have skip connections, concatenate all the states # Else only consider the state of the highest layer if args.layers > 1: if args.skip_connections or args.skip_output: h = tensor.concatenate(h, axis=2) else: h = h[-1] else: h = h[0] h.name = "hidden_state_all" presoft = get_presoft(h, args) cost, unregularized_cost = get_costs(presoft, args) return cost, unregularized_cost, updates, gate_values, hidden_states
def build_model_lstm(args, dtype=floatX): logger.info('Building model ...') # Return list of 3D Tensor, one for each layer # (Time X Batch X embedding_dim) pre_rnn, x_mask = get_prernn(args) rnn = get_rnn(args) # Prepare inputs and initial states for the RNN kwargs, inits = get_rnn_kwargs(pre_rnn, args) # Apply the RNN to the inputs h = rnn.apply(mask=x_mask, **kwargs) # h = [state, cell, in, forget, out, state_1, # cell_1, in_1, forget_1, out_1 ...] last_states = {} last_cells = {} hidden_states = [] for d in range(args.layers): # TODO correct bug # h[5 * d] = h[5 * d] * x_mask # h[5 * d + 1] = h[5 * d + 1] * x_mask last_states[d] = h[5 * d][-1, :, :] last_cells[d] = h[5 * d + 1][-1, :, :] h[5 * d].name = "hidden_state_" + str(d) h[5 * d + 1].name = "hidden_cell_" + str(d) hidden_states.extend([h[5 * d], h[5 * d + 1]]) # The updates of the hidden states # Note: if we have mask, then updating initial state # with last state does not make sence anymore. updates = [] for d in range(args.layers): updates.append((inits[0][d], last_states[d])) updates.append((inits[1][d], last_states[d])) # h = [state, cell, in, forget, out, state_1, # cell_1, in_1, forget_1, out_1 ...] # Extract the values in_gates = h[2::5] forget_gates = h[3::5] out_gates = h[4::5] gate_values = { "in_gates": in_gates, "forget_gates": forget_gates, "out_gates": out_gates } h = h[::5] # Now we have correctly: # h = [state, state_1, state_2 ...] if args.layers > 1 # h = [state] if args.layers == 1 # If we have skip connections, concatenate all the states # Else only consider the state of the highest layer if args.layers > 1: if args.skip_connections or args.skip_output: h = tensor.concatenate(h, axis=2) else: h = h[-1] else: h = h[0] h.name = "hidden_state_all" presoft = get_presoft(h, args) cost, unregularized_cost = get_costs(presoft, args) return cost, unregularized_cost, updates, gate_values, hidden_states