def create_model(ids,vocab2id,size): word_vector_size = size hidden_state_size = size P = Parameters() P.V = create_vocab_vectors(P,vocab2id,word_vector_size) P.W_predict = np.zeros(P.V.get_value().shape).T P.b_predict = np.zeros((P.V.get_value().shape[0],)) X = P.V[ids] step = build_lstm_step(P,word_vector_size,hidden_state_size) [states,_],_ = theano.scan( step, sequences = [X], outputs_info = [P.init_h,P.init_c] ) scores = T.dot(states,P.W_predict) + P.b_predict scores = T.nnet.softmax(scores) log_likelihood, cross_ent = word_cost(scores[:-1],ids[1:]) cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() ) obv_cost = cross_ent return scores, cost, obv_cost, P
def create_model(ids, vocab2id, size): word_vector_size = size hidden_state_size = size P = Parameters() P.V = create_vocab_vectors(P, vocab2id, word_vector_size) P.W_predict = np.zeros(P.V.get_value().shape).T P.b_predict = np.zeros((P.V.get_value().shape[0], )) X = P.V[ids] step = build_lstm_step(P, word_vector_size, hidden_state_size) [states, _], _ = theano.scan(step, sequences=[X], outputs_info=[P.init_h, P.init_c]) scores = T.dot(states, P.W_predict) + P.b_predict scores = T.nnet.softmax(scores) log_likelihood, cross_ent = word_cost(scores[:-1], ids[1:]) cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() ) obv_cost = cross_ent return scores, cost, obv_cost, P
cell = forget_gate * prev_cell + in_gate * cell_updates out_lin = x_o + h_o + b_o + T.dot(cell, V_o) out_gate = T.nnet.sigmoid(out_lin) hid = out_gate * T.tanh(cell) return cell, hid return step if __name__ == "__main__": P = Parameters() X = T.ivector("X") P.V = np.zeros((8, 8), dtype=np.int32) X_rep = P.V[X] P.W_output = np.zeros((15, 8), dtype=np.int32) lstm_layer = build(P, name="test", input_size=8, hidden_size=15) _, hidden = lstm_layer(X_rep) output = T.nnet.softmax(T.dot(hidden, P.W_output)) delay = 5 label = X[:-delay] predicted = output[delay:] cost = -T.sum(T.log(predicted[T.arange(predicted.shape[0]), label])) params = P.values() gradients = T.grad(cost, wrt=params)
cell = forget_gate * prev_cell + in_gate * cell_updates out_lin = x_o + h_o + b_o + T.dot(cell, V_o) out_gate = T.nnet.sigmoid(out_lin) hid = out_gate * T.tanh(cell) return cell, hid return step if __name__ == "__main__": P = Parameters() X = T.ivector('X') P.V = np.zeros((8, 8), dtype=np.int32) X_rep = P.V[X] P.W_output = np.zeros((15, 8), dtype=np.int32) lstm_layer = build(P, name="test", input_size=8, hidden_size=15) _, hidden = lstm_layer(X_rep) output = T.nnet.softmax(T.dot(hidden, P.W_output)) delay = 5 label = X[:-delay] predicted = output[delay:] cost = -T.sum(T.log(predicted[T.arange(predicted.shape[0]), label])) params = P.values() gradients = T.grad(cost, wrt=params)