def __theano_build__(self): W_xh, W_hy, W_hh, b_h, b_y = self.params x = T.ivector('x') y = T.ivector('y') def forward_prop_step(x_t, h_t_prev, W_xh, W_hy, W_hh, b_h, b_y): h_t = T.tanh(W_xh[:, x_t] + T.dot(W_hh, h_t_prev) + b_h) o_t = T.nnet.softmax(T.dot(W_hy, h_t) + b_y) return [o_t[0], h_t] h_0 = T.zeros(self.n_hidden) [o, h], _ = theano.scan(forward_prop_step, sequences=x, outputs_info=[None, dict(initial=h_0)], non_sequences=[W_xh, W_hy, W_hh, b_h, b_y], truncate_gradient=self.bptt_truncate, strict=True) prediction = T.argmax(o, axis=1) learning_rate = T.scalar('learning_rate') self.cost = T.sum(T.nnet.categorical_crossentropy(o, y)) self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate) # Assign functions self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], self.cost) self.bptt = theano.function([x, y], self.gparams) # SGD self.sgd_step = theano.function([x, y, learning_rate], [], updates=self.updates)
def __theano_build__(self): E, W_x, W_h, W_y, b, b_y = self.params x = T.ivector('x') y = T.ivector('y') def forward_prop_step(x_t, h1_t_prev, c1_t_prev, h2_t_prev, c2_t_prev, E, W_x, W_h, W_y, b, b_y): # Layer 1 x_layer = E[:, x_t] i1_t = T.nnet.sigmoid(T.dot(W_x[0], x_layer) + T.dot(W_h[0], h1_t_prev) + b[0]) f1_t = T.nnet.sigmoid(T.dot(W_x[1], x_layer) + T.dot(W_h[1], h1_t_prev) + b[1]) o1_t = T.nnet.sigmoid(T.dot(W_x[2], x_layer) + T.dot(W_h[2], h1_t_prev) + b[2]) c1_t_cap = T.tanh(T.dot(W_x[3], x_layer) + T.dot(W_h[3], h1_t_prev) + b[3]) c1_t = i1_t * c1_t_cap + f1_t * c1_t_prev h1_t = o1_t * T.tanh(c1_t) # Layer 2 x_layer = h1_t i2_t = T.nnet.sigmoid(T.dot(W_x[4], x_layer) + T.dot(W_h[4], h2_t_prev) + b[4]) f2_t = T.nnet.sigmoid(T.dot(W_x[5], x_layer) + T.dot(W_h[5], h2_t_prev) + b[5]) o2_t = T.nnet.sigmoid(T.dot(W_x[6], x_layer) + T.dot(W_h[6], h2_t_prev) + b[6]) c2_t_cap = T.tanh(T.dot(W_x[7], x_layer) + T.dot(W_h[7], h2_t_prev) + b[7]) c2_t = i2_t * c2_t_cap + f2_t * c2_t_prev h2_t = o2_t * T.tanh(c2_t) y_t = T.nnet.softmax(T.dot(W_y, h1_t) + b_y) return [y_t[0], h1_t, c1_t, h2_t, c2_t] h1_0 = T.zeros(self.n_hidden) c1_0 = T.zeros(self.n_hidden) h2_0 = T.zeros(self.n_hidden) c2_0 = T.zeros(self.n_hidden) [o,h1,c1,h2,c2], _ = theano.scan( forward_prop_step, sequences=x, outputs_info=[None, dict(initial=h1_0), dict(initial=c1_0), dict(initial=h2_0), dict(initial=c2_0)], non_sequences=[E, W_x, W_h, W_y, b, b_y], truncate_gradient=self.bptt_truncate, strict=True) prediction = T.argmax(o, axis=1) learning_rate = T.scalar('learning_rate') self.cost = T.sum(T.nnet.categorical_crossentropy(o, y)) self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate) # Assign functions self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], self.cost) self.bptt = theano.function([x, y], self.gparams) # SGD self.sgd_step = theano.function([x,y,learning_rate], [], updates=self.updates)
def __theano_build__(self): f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y = self.params f_x = T.ivector('f_x') b_x = f_x[::-1] y = T.ivector('y') def forward_prop_step(f_x_t, b_x_t, f_h_t_prev, b_h_t_prev, f_c_t_prev, b_c_t_prev, f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y): # Forward LSTM f_i_t = T.nnet.sigmoid(f_W_x[0, :, f_x_t] + T.dot(f_W_h[0], f_h_t_prev) + f_b[0]) f_f_t = T.nnet.sigmoid(f_W_x[1, :, f_x_t] + T.dot(f_W_h[1], f_h_t_prev) + f_b[1]) f_o_t = T.nnet.sigmoid(f_W_x[2, :, f_x_t] + T.dot(f_W_h[2], f_h_t_prev) + f_b[2]) f_c_t_cap = T.tanh(f_W_x[3, :, f_x_t] + T.dot(f_W_h[3], f_h_t_prev) + f_b[3]) f_c_t = f_i_t * f_c_t_cap + f_f_t * f_c_t_prev f_h_t = f_o_t * T.tanh(f_c_t) # Backward LSTM b_i_t = T.nnet.sigmoid(b_W_x[0, :, b_x_t] + T.dot(b_W_h[0], b_h_t_prev) + b_b[0]) b_f_t = T.nnet.sigmoid(b_W_x[1, :, b_x_t] + T.dot(b_W_h[1], b_h_t_prev) + b_b[1]) b_o_t = T.nnet.sigmoid(b_W_x[2, :, b_x_t] + T.dot(b_W_h[2], b_h_t_prev) + b_b[2]) b_c_t_cap = T.tanh(b_W_x[3, :, b_x_t] + T.dot(b_W_h[3], b_h_t_prev) + b_b[3]) b_c_t = b_i_t * b_c_t_cap + b_f_t * b_c_t_prev b_h_t = b_o_t * T.tanh(b_c_t) y_t = T.nnet.softmax(T.dot(f_W_y, f_h_t) + T.dot(b_W_y, b_h_t) + b_y) return [y_t[0], f_h_t, f_c_t, b_h_t, b_c_t] f_h_0 = T.zeros(self.n_hidden) f_c_0 = T.zeros(self.n_hidden) b_h_0 = T.zeros(self.n_hidden) b_c_0 = T.zeros(self.n_hidden) [o,fh,fc, bh, bc], _ = theano.scan( forward_prop_step, sequences=[f_x, b_x], outputs_info=[None, dict(initial=f_h_0), dict(initial=f_c_0), dict(initial=b_h_0), dict(initial=b_c_0)], non_sequences=[f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y], truncate_gradient=self.bptt_truncate, strict=True) prediction = T.argmax(o, axis=1) learning_rate = T.scalar('learning_rate') self.cost = T.sum(T.nnet.categorical_crossentropy(o, y)) self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate) # Assign functions self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], self.cost) self.bptt = theano.function([x, y], self.gparams) # SGD self.sgd_step = theano.function([x,y,learning_rate], [], updates=self.updates)
def __theano_build__(self): W_xz, W_xr, W_x_hcap, W_sz, W_sr, W_s_hcap, W_o, b_z, b_r, b_hcap, b_o = self.params x = T.ivector('x') y = T.ivector('y') def forward_prop_step(x_t, s_t_prev, W_xz, W_xr, W_x_hcap, W_sz, W_sr, W_s_hcap, W_o, b_z, b_r, b_hcap, b_o): z_t = T.nnet.sigmoid(W_xz[:, x_t] + T.dot(W_sz, s_t_prev) + b_z) r_t = T.nnet.sigmoid(W_xr[:, x_t] + T.dot(W_sr, s_t_prev) + b_r) h_t_cap = T.tanh(W_x_hcap[:, x_t] + T.dot(W_s_hcap, (r_t * s_t_prev)) + b_hcap) s_t = (T.ones_like(z_t) - z_t) + z_t * h_t_cap o_t = T.nnet.softmax(T.dot(W_o, s_t) + b_o) return [o_t[0], s_t] h_0 = T.zeros(self.n_hidden) [o, h], _ = theano.scan(fn=forward_prop_step, sequences=x, outputs_info=[None, dict(initial=h_0)], non_sequences=[ W_xz, W_xr, W_x_hcap, W_sz, W_sr, W_s_hcap, W_o, b_z, b_r, b_hcap, b_o ], truncate_gradient=self.bptt_truncate, strict=True) prediction = T.argmax(o, axis=1) learning_rate = T.scalar('learning_rate') self.cost = T.sum(T.nnet.categorical_crossentropy(o, y)) self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate) # Assign functions self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], self.cost) self.bptt = theano.function([x, y], self.gparams) # SGD self.sgd_step = theano.function([x, y, learning_rate], [], updates=self.updates)
def __theano_build__(self): W_x, W_h, W_y, b, b_y = self.params x = T.ivector('x') y = T.ivector('y') def forward_prop_step(x_t, h_t_prev, c_t_prev, W_x, W_h, W_y, b, b_y): i_t = T.nnet.sigmoid(W_x[0, :, x_t] + T.dot(W_h[0], h_t_prev) + b[0]) f_t = T.nnet.sigmoid(W_x[1, :, x_t] + T.dot(W_h[1], h_t_prev) + b[1]) o_t = T.nnet.sigmoid(W_x[2, :, x_t] + T.dot(W_h[2], h_t_prev) + b[2]) c_t_cap = T.tanh(W_x[3, :, x_t] + T.dot(W_h[3], h_t_prev) + b[3]) c_t = i_t * c_t_cap + f_t * c_t_prev h_t = o_t * T.tanh(c_t) y_t = T.nnet.softmax(T.dot(W_y, h_t) + b_y) return [y_t[0], h_t, c_t] h_0 = T.zeros(self.n_hidden) c_0 = T.zeros(self.n_hidden) [o,h,c], _ = theano.scan( forward_prop_step, sequences=x, outputs_info=[None, dict(initial=h_0), dict(initial=c_0)], non_sequences=[W_x, W_h, W_y, b, b_y], truncate_gradient=self.bptt_truncate, strict=True) prediction = T.argmax(o, axis=1) learning_rate = T.scalar('learning_rate') self.cost = T.sum(T.nnet.categorical_crossentropy(o, y)) self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate) # Assign functions self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], self.cost) self.bptt = theano.function([x, y], self.gparams) # SGD self.sgd_step = theano.function([x,y,learning_rate], [], updates=self.updates)