def get_training_model(Ws_s, bs_s, dropout=False, lambd=10.0, kappa=1.0): # Build a dual network, one for the real move, one for a fake random move # Train on a negative log likelihood of classifying the right move xc_s, xc_p = get_model(Ws_s, bs_s, dropout=dropout) xr_s, xr_p = get_model(Ws_s, bs_s, dropout=dropout) xp_s, xp_p = get_model(Ws_s, bs_s, dropout=dropout) #loss = -T.log(sigmoid(xc_p + xp_p)).mean() # negative log likelihood #loss += -T.log(sigmoid(-xp_p - xr_p)).mean() # negative log likelihood cr_diff = xc_p - xr_p loss_a = -T.log(sigmoid(cr_diff)).mean() cp_diff = kappa * (xc_p + xp_p) loss_b = -T.log(sigmoid( cp_diff)).mean() loss_c = -T.log(sigmoid(-cp_diff)).mean() # Add regularization terms reg = 0 for x in Ws_s + bs_s: reg += lambd * (x ** 2).mean() loss = loss_a + loss_b + loss_c return xc_s, xr_s, xp_s, loss, reg, loss_a, loss_b, loss_c
def sample_gradient(): print "微分" x, y = T.dscalars("x", "y") z = (x+2*y)**2 # dz/dx gx = T.grad(z, x) fgx = theano.function([x,y], gx) print fgx(1.0, 1.0) # dz/dy gy = T.grad(z, y) fgy = theano.function([x,y], gy) print fgy(1.0, 1.0) # d{sigmoid(x)}/dx x = T.dscalar("x") sig = sigmoid(x) dsig = T.grad(sig, x) f = theano.function([x], dsig) print f(0.0) print f(1.0) # d{sigmoid(<x,w>)}/dx w = T.dscalar("w") sig = sigmoid(T.dot(x,w)) dsig = T.grad(sig, x) f = theano.function([x, w], dsig) print f(1.0, 2.0) print f(3.0, 4.0) print
def lstm_output(self, y_prev, ch_prev): """calculates info to pass to next time step. ch_prev is a vector of size 2*hdim""" c_prev = ch_prev[:self.hdim]#T.vector('c_prev') h_prev = ch_prev[self.hdim:]#T.vector('h_prev') # gates (input, forget, output) i_t = sigmoid(T.dot(self.Ui, h_prev)) f_t = sigmoid(T.dot(self.Uf, h_prev)) o_t = sigmoid(T.dot(self.Uo, h_prev)) # new memory cell c_new_t = T.tanh(T.dot(self.Uc, h_prev)) # final memory cell c_t = f_t * c_prev + i_t * c_new_t # final hidden state h_t = o_t * T.tanh(c_t) # Input vector for softmax theta_t = T.dot(self.U, h_t) + self.b # Softmax prob vector y_hat_t = softmax(theta_t.T).T # Softmax wraps output in another list, why?? # (specifically it outputs a 2-d row, not a 1-d column) # y_hat_t = y_hat_t[0] # Compute new cost out_label = T.argmax(y_hat_t) # final joint state ch_t = T.concatenate([c_t, h_t]) return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
def forward(self, data, h): z = NNET.sigmoid(THT.dot(data, self.Wz) + THT.dot(h, self.Uz) + self.bz) r = NNET.sigmoid(THT.dot(data, self.Wr) + THT.dot(h, self.Ur) + self.br) c = THT.tanh(THT.dot(data, self.Wg) + THT.dot(r * h, self.Ug) + self.bg) out = (1 - z) * h + z * c return out
def make_ann(self, hidden_layers, lr): self.W = [ theano.shared( rng.uniform(-0.1, 0.1, size=(784, hidden_layers[0]))) ] self.B = [theano.shared(rng.uniform(-0.1, 0.1, size=(784)))] innput = T.vector('innput') self.X = [Tann.sigmoid(T.dot(innput, self.W[0]) + self.B[0])] params = [self.W[0], self.B[0]] for n in range(1, len(hidden_layers)): #Finding number of inputs n_in = hidden_layers[n - 1] n_out = hidden_layers[n] #making Bias and weights for a layer self.W.append( theano.shared(rng.uniform(-0.1, 0.1, size=(n_in, n_out)))) # self.B.append(theano.shared(rng.uniform(-0.1, 0.1, size=(n_in)))) # self.X.append( Tann.sigmoid(T.dot(self.W[n], self.W[n - 1]) + self.B[n])) params.append(self.W[n]) params.append(self.B[n]) # error = T.sum((innput - self.W[-1])**2) print(error) print(params) # gradients = T.grad(error, params) backprop_acts = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.predictor = theano.function([innput], [self.X]) self.trainer = theano.function([innput], error, updates=backprop_acts)
def __step(img, prev_bbox, prev_att, state): cx = (prev_bbox[:, 2] + prev_bbox[:, 0]) / 2. cy = (prev_bbox[:, 3] + prev_bbox[:, 1]) / 2. sigma = TT.exp(prev_att[:, 0]) * (max(img_col, img_row) / 2) fract = TT.exp(prev_att[:, 1]) amplifier = TT.exp(prev_att[:, 2]) eps = 1e-8 abs_cx = (cx + 1) / 2. * (img_col - 1) abs_cy = (cy + 1) / 2. * (img_row - 1) abs_stride = (fract * (max(img_col, img_row) - 1)) * ((1. / (NUM_N - 1.)) if NUM_N > 1 else 0) FX, FY = __filterbank(abs_cx, abs_cy, abs_stride, sigma) unnormalized_mask = (FX.dimshuffle(0, 'x', 1, 'x', 2) * FY.dimshuffle(0, 1, 'x', 2, 'x')).sum(axis=2).sum(axis=1) mask = unnormalized_mask# / (unnormalized_mask.sum(axis=2).sum(axis=1) + eps).dimshuffle(0, 'x', 'x') masked_img = (mask.dimshuffle(0, 'x', 1, 2) * img) * amplifier.dimshuffle(0, 'x', 'x', 'x') conv1 = conv2d(masked_img, conv1_filters, subsample=(conv1_stride, conv1_stride)) act1 = TT.tanh(conv1) flat1 = TT.reshape(act1, (batch_size, conv1_output_dim)) gru_in = TT.concatenate([flat1, prev_bbox], axis=1) gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz) gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br) gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg) gru_h = (1 - gru_z) * state + gru_z * gru_h_ bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2) att = TT.dot(gru_h, W_fc3) + b_fc3 return bbox, att, gru_h, mask
def test_local_sigm_times_exp(self): """ Test the `local_sigm_times_exp` optimization. exp(x) * sigm(-x) -> sigm(x) exp(-x) * sigm(x) -> sigm(-x) """ def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace']) x, y = tensor.vectors('x', 'y') f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m) match(f, [tensor.neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m) match(f, [tensor.neg, sigmoid, tensor.neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = theano.function( [x, y], (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) * tensor.exp(x * y) * tensor.exp(y)), mode=m) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (tensor.mul, 2), (tensor.neg, 1), (tensor.exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_local_sigm_times_exp(self): """ Test the `local_sigm_times_exp` optimization. exp(x) * sigm(-x) -> sigm(x) exp(-x) * sigm(x) -> sigm(-x) """ def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace']) x, y = tensor.vectors('x', 'y') f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m) match(f, [sigmoid]) f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m) match(f, [tensor.neg, sigmoid]) f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m) match(f, [tensor.neg, sigmoid, tensor.neg]) f = theano.function( [x, y], (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) * tensor.exp(x * y) * tensor.exp(y)), mode=m) match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid, tensor.mul])
def test_log1msigm_to_softplus(self): x = T.matrix() out = T.log(1 - sigmoid(x)) f = theano.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus) assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg) f(numpy.random.rand(54, 11).astype(config.floatX)) # Same test with a flatten out = T.log(1 - T.flatten(sigmoid(x))) f = theano.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() assert len(topo) == 3 assert isinstance(topo[0].op, T.Flatten) assert isinstance(topo[1].op.scalar_op, theano.tensor.nnet.sigm.ScalarSoftplus) assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg) f(numpy.random.rand(54, 11).astype(config.floatX)) # Same test with a reshape out = T.log(1 - sigmoid(x).reshape([x.size])) f = theano.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() #assert len(topo) == 3 assert any(isinstance(node.op, T.Reshape) for node in topo) assert any(isinstance(getattr(node.op, 'scalar_op', None), theano.tensor.nnet.sigm.ScalarSoftplus) for node in topo) f(numpy.random.rand(54, 11).astype(config.floatX))
def build_custom_ann(self, layer_list, ann_type = "rlu", nb = 784): ''' ''' layer_list = [nb] + layer_list input = T.dvector('input') target = T.wvector('target') w_list = [] x_list = [] w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[0],layer_list[1])))) if ann_type == "rlu": x_list.append(T.switch(T.dot(input,w_list[0]) > 0, T.dot(input,w_list[0]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(input, w_list[0]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(input, w_list[0]))) for count in range(0, len(layer_list) - 2): w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1],layer_list[count + 2])))) if ann_type=="rlu": x_list.append(T.switch(T.dot(x_list[count],w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(x_list[count],w_list[count + 1]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(x_list[count],w_list[count + 1]))) w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10)))) x_list.append(T.switch(T.dot(x_list[-1],w_list[-1]) > 0, T.dot(x_list[-1],w_list[-1]), 0)) error = T.sum(pow((target - x_list[-1]), 2)) params = w_list gradients = T.grad(error, params) backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
def scan_function(input, inter_output, W, U, Wz, Uz, Wr, Ur, buw, bz, br): rj = nnet.sigmoid(T.dot(input, Wr) + T.dot(inter_output, Ur) + br) zj = nnet.sigmoid(T.dot(input, Wz) + T.dot(inter_output, Uz) + bz) htilde = T.tanh(T.dot(input, W) + rj * T.dot(inter_output, U) + buw) inter_output = zj * inter_output + (1 - zj) * htilde return inter_output
def fp(self, x, _): relu = lambda x: T.max(x, 0) h = self.model.hiddens["h_%d" % self.hidden_id]['val'] c = self.model.hiddens["c_%d" % self.hidden_id]['val'] it = sigmoid(T.dot(x, self.Wxi) + T.dot(h, self.Whi) + T.dot(c, self.Wci) + self.Bi) ft = sigmoid(T.dot(x, self.Wxf) + T.dot(h, self.Whf) + T.dot(c, self.Wcf) + self.Bf) self.ct = ft * c + it * T.tanh(T.dot(x, self.Wxc) + T.dot(h, self.Whc) + self.Bc) ot = sigmoid(T.dot(x, self.Wxo) + T.dot(h, self.Who) + T.dot(self.ct, self.Wco) + self.Bo) self.output = ot * T.tanh(self.ct)
def gru_timestep(self, x_t, h_prev): Lx_t = self.L[:,x_t] # gates (update, reset) z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev)) r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev)) # combine them h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev)) h_t = z_t * h_prev + (1 - z_t) * h_new_t return h_t
def rbm_ais_gibbs_for_v(rbmA_params, rbmB_params, beta, v_sample, seed=23098): """ Parameters: ----------- rbmA_params: list Parameters of the baserate model (usually infinite temperature). List should be of length 3 and contain numpy.ndarrays corresponding to model parameters (weights, visbias, hidbias). rbmB_params: list similar to rbmA_params, but for model at temperature 1. beta: theano.shared scalar, represents inverse temperature at which we wish to sample from. v_sample: theano.shared matrix of shape (n_runs, nvis), state of current particles. seed: int optional seed parameter for sampling from binomial units. """ (weights_a, visbias_a, hidbias_a) = rbmA_params (weights_b, visbias_b, hidbias_b) = rbmB_params theano_rng = RandomStreams(seed) # equation 15 (Salakhutdinov & Murray 2008) ph_a = nnet.sigmoid( (1 - beta) * (tensor.dot(v_sample, weights_a) + hidbias_a)) ha_sample = theano_rng.binomial( size=(v_sample.shape[0], len(hidbias_a)), n=1, p=ph_a, dtype=config.floatX) # equation 16 (Salakhutdinov & Murray 2008) ph_b = nnet.sigmoid(beta * (tensor.dot(v_sample, weights_b) + hidbias_b)) hb_sample = theano_rng.binomial( size=(v_sample.shape[0], len(hidbias_b)), n=1, p=ph_b, dtype=config.floatX) # equation 17 (Salakhutdinov & Murray 2008) pv_act = (1 - beta) * (tensor.dot(ha_sample, weights_a.T) + visbias_a) + \ beta * (tensor.dot(hb_sample, weights_b.T) + visbias_b) pv = nnet.sigmoid(pv_act) new_v_sample = theano_rng.binomial( size=(v_sample.shape[0], len(visbias_b)), n=1, p=pv, dtype=config.floatX) return new_v_sample
def get_reconstruction_cost(self, updates, pre_nv): ''' Approximation to the reconstruction error ''' cross_entropy = T.mean( T.sum(self.inputs * T.log(sigmoid(pre_nv)) + (1-self.inputs) * T.log(1 - sigmoid(pre_nv)), axis=1 ) ) return cross_entropy
def new_output(self, y_prev, h_prev): # gates (update, reset) z_t = sigmoid(T.dot(self.Uz, h_prev)) r_t = sigmoid(T.dot(self.Ur, h_prev)) # combine them h_new_t = T.tanh(r_t * T.dot(self.Uh, h_prev)) h_t = z_t * h_prev + (1 - z_t) * h_new_t # compute new out_label y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T out_label = T.argmax(y_hat_t) return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
def _step(x_, h_, c_): preact = tensor.dot(tensor.concatenate((h_, input_layer(x_, h_))), W) preact += b i = nnet.sigmoid(_slice(preact, 0, n_hidden)) f = nnet.sigmoid(_slice(preact, 1, n_hidden)) o = nnet.sigmoid(_slice(preact, 2, n_hidden)) c = nnet.sigmoid(_slice(preact, 3, n_hidden)) c = f * c_ + i * c h = o * tensor.tanh(c) return h, c
def _step(img, prev_bbox, state): # of (batch_size, nr_filters, some_rows, some_cols) conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride)) act1 = TT.tanh(conv1) flat1 = TT.reshape(act1, (batch_size, conv1_output_dim)) gru_in = TT.concatenate([flat1, prev_bbox], axis=1) gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz) gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br) gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg) gru_h = (1-gru_z) * state + gru_z * gru_h_ bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2) return bbox, gru_h
def _step(x_, h_, c_): preact = tensor.dot(h_, U) preact += x_ i = nnet.sigmoid(_slice(preact, 0, n_hidden)) f = nnet.sigmoid(_slice(preact, 1, n_hidden)) o = nnet.sigmoid(_slice(preact, 2, n_hidden)) c = tensor.tanh(_slice(preact, 3, n_hidden)) c = f * c_ + i * c h = o * tensor.tanh(c) return h, c
def build_ann(self, nb = 784, nh = 2, learning_rate = 0.1): w1 = theano.shared(np.random.uniform(-.1,.1,size=(nb,nh))) w2 = theano.shared(np.random.uniform(-.1,.1,size=(nh,nb))) input = T.dvector('input') b1 = theano.shared(np.random.uniform(-.1,.1,size=nh)) b2 = theano.shared(np.random.uniform(-.1,.1,size=nb)) x1 = Tann.sigmoid(T.dot(input,w1) + b1) x2 = Tann.sigmoid(T.dot(x1,w2) + b2) error = T.sum((input - x2)**2) params = [w1,b1,w2,b2] gradients = T.grad(error,params) backprop_acts = [(p, p - learning_rate*g) for p,g in zip(params,gradients)] self.predictor = theano.function([input],[x2,x1]) self.trainer = theano.function([input],error,updates=backprop_acts)
def dgru_output(self, x_t, old_label, h_prev): Lx_t = self.L[:,x_t] # gates (update, reset) z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev)) r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev)) # combine them h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev)) h_t = z_t * h_prev + (1 - z_t) * h_new_t y_hat_t = softmax(T.dot(self.U, h_t) + self.b)[0] out_label = T.argmax(y_hat_t) return out_label, h_t
def _step(x_, m_, h_, c_): preact = tensor.dot(h_, U) preact += x_ i = nnet.sigmoid(_slice(preact, 0, n_hidden)) f = nnet.sigmoid(_slice(preact, 1, n_hidden)) o = nnet.sigmoid(_slice(preact, 2, n_hidden)) c = tensor.tanh(_slice(preact, 3, n_hidden)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def build_ann(self,nb,nh): w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, 10))) input = T.fmatrix() target = T.fmatrix() x1 = Tann.sigmoid(T.dot(input,w1)) x2 = Tann.sigmoid(T.dot(x1,w2)) error = T.sum(pow((target - x2), 2)) params = [w1, w2] gradients = T.grad(error, params) backprops = self.backprop_acts(params, gradients) self.get_x1 = theano.function(inputs=[input, target], outputs=error, allow_input_downcast=True) self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x2, allow_input_downcast=True)
def dgru_timestep(self, x_t, old_cost, h_prev, ys): Lx_t = self.L[:,x_t] # gates (update, reset) z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev)) r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev)) # combine them h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev)) h_t = z_t * h_prev + (1 - z_t) * h_new_t y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])])) # We don't divide yet by batch size new_cost = old_cost + cost return cost, h_t
def buildann(self, nb , nh, nob, lr): x = [] #weights with initial random values between -0.1 and 0.1 for i in range(len(nh)): if i == 0: self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nb, nh[i])))) if i != 0: self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nh[i - 1], nh[i])))) if i == len(nh) - 1: self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nh[i], nob)))) #input is the image, label is the possible answers(0 to 9) input = T.dvector ('input') label = T.dvector ('label') #node values with initial random values between -0.1 and 0.1 for i in range(len(nh)): self.b.append(theano.shared(np.random.uniform(-.1, .1, size = nh[i]))) if i == len(nh) - 1: self.b.append(theano.shared(np.random.uniform(-.1, .1, size = nob))) #activation functions for i in range(len(nh)): if i == 0: x.append(Tann.sigmoid(T.dot(input, self.w[i]) + self.b[i])) x.append(Tann.sigmoid(T.dot(x[i], self.w[i + 1]) + self.b[i + 1])) #error calculation, which gives least error for right guesses error = T.sum((x[len(nh)] - label)**2) #parameters needed for the gradient search params = [] for i in range(len(self.w)): params.append(self.w[i]) params.append(self.b[i]) #gradient search gradients = T.grad(error, params) #backpropagation for updating weigths and node values backprop_acts = [(p, p - self.lrate * g) for p,g in zip(params, gradients)] #testing function self.predictor = theano.function([input], x[len(nh)]) #training function self.trainer = theano.function([input, label], [x[len(nh)], error], updates = backprop_acts)
def __step(img, prev_bbox, state, timestep): conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half') act1 = NN.relu(conv1) flat1 = TT.reshape(act1, (-1, conv1_output_dim)) gru_in = TT.concatenate([flat1, prev_bbox], axis=1) gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz) gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br) gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg) gru_h = (1 - gru_z) * state + gru_z * gru_h_ bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2) bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col x = TT.arange(img_row, dtype=T.config.floatX) y = TT.arange(img_col, dtype=T.config.floatX) mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4) my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4) bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1) new_cls1_f = cls_f new_cls1_b = cls_b mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2) new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask)) new_featmaps.name = 'new_featmaps' new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask)) new_probmaps.name = 'new_probmaps' train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col))) train_featmaps.name = 'train_featmaps' train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1]) train_probmaps.name = 'train_probmaps' for _ in range(0, 5): train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col)) train_convmaps.name = 'train_convmaps' train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col)) train_convmaps_selected.name = 'train_convmaps_selected' train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x')) train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean() train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b]) new_cls1_f -= train_grad_cls1_f * 0.1 new_cls1_b -= train_grad_cls1_b * 0.1 return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
def __init__(self, input=tensor.dvector('input'), target=tensor.dvector('target'), n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw): super(NNet, self).__init__(**kw) self.input = input self.target = target self.lr = shared(lr, 'learning_rate') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)} self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def get_model(Ws, bs, dropout=False): v = T.matrix('input') m = T.matrix('missing') q = T.matrix('target') k = T.vector('normalization factor') # Set all missing/target values to 0.5 keep_mask = (1-m) * (1-q) h = keep_mask * (v * 2 - 1) # Convert to +1, -1 # Normalize layer 0 h *= k.dimshuffle(0, 'x') for l in xrange(len(Ws)): h = T.dot(h, Ws[l]) + bs[l] if l < len(Ws) - 1: h = h * (h > 0) # relu if dropout: mask = srng.binomial(n=1, p=0.5, size=h.shape) h = h * mask * 2 output = sigmoid(h) LL = v * T.log(output) + (1 - v) * T.log(1 - output) # loss = -(q * LL).sum() / q.sum() loss = -((1 - m) * LL).sum() / (1 - m).sum() return v, m, q, k, output, loss
def makelayer(X, input_size, output_size): w = np.random.randn(input_size + 1, output_size) W = theano.shared(np.asarray(w, dtype=theano.config.floatX)) bias = np.asarray(np.random.randn(1), dtype=theano.config.floatX) B = theano.shared(bias) new_X = T.concatenate([X, B]) return nnet.sigmoid(T.dot(W.T, new_X)), W, B
def get_update(Ws_s, bs_s): x, fx = train.get_model(Ws_s, bs_s) # Ground truth (who won) y = T.vector('y') # Compute loss (just log likelihood of a sigmoid fit) y_pred = sigmoid(fx) loss = -( y * T.log(y_pred) + (1 - y) * T.log(1 - y_pred)).mean() # Metrics on the number of correctly predicted ones frac_correct = ((fx > 0) * y + (fx < 0) * (1 - y)).mean() # Updates learning_rate_s = T.scalar(dtype=theano.config.floatX) momentum_s = T.scalar(dtype=theano.config.floatX) updates = train.nesterov_updates(loss, Ws_s + bs_s, learning_rate_s, momentum_s) f_update = theano.function( inputs=[x, y, learning_rate_s, momentum_s], outputs=[loss, frac_correct], updates=updates, ) return f_update
import numpy as np from theano import shared, function import theano.tensor as T from theano.tensor.nnet import sigmoid # Refer to ex02 for more on Theano. # Model: x = T.matrix() W = shared(0.01 * np.random.randn(784, 10)) b = shared(np.zeros(10)) y = sigmoid(T.dot(x, W) + b) # cost target = T.matrix() cost = T.mean((y - target)**2) # Alterantively, you can use the following # which adds some regularization cost = T.mean((y - target)**2) + 0.0001 * T.sum(W**2) # Functions to use model: feedforward = function([x], y)
def layer(x, w): b = np.array([1], dtype=theano.config.floatX) new_x = T.concatenate([x, b]) m = T.dot(w.T, new_x) #theta1: 3x3 * x: 3x1 = 3x1 ;;; theta2: 1x4 * 4x1 h = nnet.sigmoid(m) return h
def __init__(self, rng, input, filter_shape, poolsize=(2,2), stride=None, if_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1,0,2,3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1,0,2,3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared( np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) self.b_delta = theano.shared(value=b_values, borrow=True) conv_out = nnet.conv2d( input=input, filters=self.W, filter_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') if act == ConvolutionLayer.ACT_TANH: self.output = T.tanh(tmp) elif act == ConvolutionLayer.ACT_SIGMOID: self.output = nnet.sigmoid(tmp) elif act == ConvolutionLayer.ACT_ReLu: self.output = tmp * (tmp>0) elif act == ConvolutionLayer.ACT_SoftPlus: self.output = T.log2(1+T.exp(tmp)) else: self.output = tmp # store parameters of this layer self.params = [self.W, self.b] self.deltas = [self.W_delta, self.b_delta]
def set_output(self): self._output = sigmoid(self._prev_layer.output)
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ uidx = T.iscalar() msk = T.imatrix() dist_pos = T.fmatrix() dist_neg = T.fmatrix() seq_n, seq_len = msk.shape # 315 x 315 tu = self.t[uidx] # (20, ) xpidxs = self.tra_buys_masks[uidx] # (1264, ) xqidxs = self.tra_buys_neg_masks[uidx] # (1264, ) gps = self.g[xpidxs[:seq_len]] # (315, 20) hps, hqs = self.h[xpidxs[1:seq_len + 1]], self.h[xqidxs[1:seq_len + 1]] # (315, 20) zps, zqs = self.z[xpidxs[1:seq_len + 1]], self.z[xqidxs[1:seq_len + 1]] guiq_pqs = Unique(False, False, False)(xpidxs) uiq_g = self.g[guiq_pqs] pqs = T.concatenate((xpidxs, xqidxs)) uiq_pqs = Unique(False, False, False)(pqs) uiq_h = self.h[uiq_pqs] uiq_z = self.z[uiq_pqs] t_z = T.sum(tu * zps, 1) # (315, ) n_h = T.sum(msk, 1) # (315, ) expand_g = gps.reshape((1, seq_len, n_hidden)) * msk.reshape( (seq_n, seq_len, 1)) # (315, 315, 20) sp = T.sum( T.sum(expand_g * hps.reshape( (seq_n, 1, n_hidden)), 2) * self.f_d(dist_pos), 1 ) / n_h + t_z # [(315, 315) * (315, 315)] -> (315, ) / (315, ) + (315, ) sq = T.sum( T.sum(expand_g * hqs.reshape( (seq_n, 1, n_hidden)), 2) * self.f_d(dist_neg), 1) / n_h + t_z # sp = T.sum(T.sum(expand_g * hps.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z # sq = T.sum(T.sum(expand_g * hqs.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z loss = T.sum(T.log(sigmoid(sp - sq))) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par**2) for par in [gps, hps, hqs, zps, zqs]]) seq_costs = (-loss + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_g = T.set_subtensor( uiq_g, uiq_g - lr * T.grad(seq_costs, self.g)[guiq_pqs]) update_h = T.set_subtensor( uiq_h, uiq_h - lr * T.grad(seq_costs, self.h)[uiq_pqs]) update_t = T.set_subtensor(tu, tu - lr * T.grad(seq_costs, self.t)[uidx]) update_z = T.set_subtensor( uiq_z, uiq_z - lr * T.grad(seq_costs, self.z)[uiq_pqs]) seq_updates.append((self.g, update_g)) seq_updates.append((self.h, update_h)) seq_updates.append((self.t, update_t)) seq_updates.append((self.z, update_z)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 self.seq_train = theano.function( inputs=[uidx, dist_pos, dist_neg, msk], outputs=loss, updates=seq_updates)
def test_perform_sigm_times_exp(self): """ Test the core function doing the `sigm_times_exp` optimization. It is easier to test different graph scenarios this way than by compiling a theano function. """ x, y, z, t = tensor.vectors('x', 'y', 'z', 't') exp = tensor.exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = theano.gof.graph.is_same_graph( compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print('***') theano.printing.debugprint(compute_mul(trees[0])) print('***') theano.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp(-x), sigmoid(-x)) ok(-x * sigmoid(x) * (y * (-1 * z) * exp(-x)), -x * sigmoid(-x) * (y * (-1 * z))) ok(-sigmoid(-x) * (exp(y) * (-exp(-z) * 3 * -exp(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z))) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) * -sigmoid(x)) ok(exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x)) ok(-exp(x) * -sigmoid(-x) * -exp(-x), -sigmoid(-x))
def forward(self, x): return nnet.sigmoid(x)