def create_model(ids,vocab2id,size):
	word_vector_size  = size
	hidden_state_size = size
	
	P = Parameters()
	P.V = create_vocab_vectors(P,vocab2id,word_vector_size)
	P.W_predict = np.zeros(P.V.get_value().shape).T
	P.b_predict = np.zeros((P.V.get_value().shape[0],))
	X = P.V[ids]

	step = build_lstm_step(P,word_vector_size,hidden_state_size)

	[states,_],_ = theano.scan(
			step,
			sequences    = [X],
			outputs_info = [P.init_h,P.init_c]
		)

	scores = T.dot(states,P.W_predict) + P.b_predict
	scores = T.nnet.softmax(scores)

	log_likelihood, cross_ent = word_cost(scores[:-1],ids[1:])
	cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() )
	obv_cost = cross_ent
	return scores, cost, obv_cost, P
Beispiel #2
0
def create_model(ids, vocab2id, size):
    word_vector_size = size
    hidden_state_size = size

    P = Parameters()
    P.V = create_vocab_vectors(P, vocab2id, word_vector_size)
    P.W_predict = np.zeros(P.V.get_value().shape).T
    P.b_predict = np.zeros((P.V.get_value().shape[0], ))
    X = P.V[ids]

    step = build_lstm_step(P, word_vector_size, hidden_state_size)

    [states, _], _ = theano.scan(step,
                                 sequences=[X],
                                 outputs_info=[P.init_h, P.init_c])

    scores = T.dot(states, P.W_predict) + P.b_predict
    scores = T.nnet.softmax(scores)

    log_likelihood, cross_ent = word_cost(scores[:-1], ids[1:])
    cost = log_likelihood  #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() )
    obv_cost = cross_ent
    return scores, cost, obv_cost, P
Beispiel #3
0
        cell = forget_gate * prev_cell + in_gate * cell_updates

        out_lin = x_o + h_o + b_o + T.dot(cell, V_o)
        out_gate = T.nnet.sigmoid(out_lin)

        hid = out_gate * T.tanh(cell)
        return cell, hid

    return step


if __name__ == "__main__":
    P = Parameters()
    X = T.ivector("X")
    P.V = np.zeros((8, 8), dtype=np.int32)

    X_rep = P.V[X]
    P.W_output = np.zeros((15, 8), dtype=np.int32)
    lstm_layer = build(P, name="test", input_size=8, hidden_size=15)

    _, hidden = lstm_layer(X_rep)
    output = T.nnet.softmax(T.dot(hidden, P.W_output))
    delay = 5
    label = X[:-delay]
    predicted = output[delay:]

    cost = -T.sum(T.log(predicted[T.arange(predicted.shape[0]), label]))
    params = P.values()
    gradients = T.grad(cost, wrt=params)
Beispiel #4
0
        cell = forget_gate * prev_cell + in_gate * cell_updates

        out_lin = x_o + h_o + b_o + T.dot(cell, V_o)
        out_gate = T.nnet.sigmoid(out_lin)

        hid = out_gate * T.tanh(cell)
        return cell, hid

    return step


if __name__ == "__main__":
    P = Parameters()
    X = T.ivector('X')
    P.V = np.zeros((8, 8), dtype=np.int32)

    X_rep = P.V[X]
    P.W_output = np.zeros((15, 8), dtype=np.int32)
    lstm_layer = build(P, name="test", input_size=8, hidden_size=15)

    _, hidden = lstm_layer(X_rep)
    output = T.nnet.softmax(T.dot(hidden, P.W_output))
    delay = 5
    label = X[:-delay]
    predicted = output[delay:]

    cost = -T.sum(T.log(predicted[T.arange(predicted.shape[0]), label]))
    params = P.values()
    gradients = T.grad(cost, wrt=params)