def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a),T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.env.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.env.toposort()]) == 1 assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()]) == 2 assert sum([node.op == B.gpu_join for node in f_gpu.maker.env.toposort()]) == 1 assert sum([node.op == B.gpu_alloc for node in f_gpu2.maker.env.toposort()]) == 2 assert sum([node.op == B.gpu_join for node in f_gpu2.maker.env.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def apply(self, input_vars): c = input_vars[0] if c.ndim == 1: ones = T.ones_like(c) else: ones = T.ones_like(c[:, 0]) return -np.log(self.vec.num_types) * ones
def _cdf(self, para, X): ''' ''' z = self._z(para, X) b = para['b'].value d = para['d'].value s = para['s'].value b = b.dimshuffle(0, 'x') NU = TT.extra_ops.cumsum( TT.concatenate((b, TT.sqr(d)), axis=1), axis=1) NU = TT.concatenate( (-1e20 * TT.ones_like(b), NU, 1e20 * TT.ones_like(b)), axis=1) NU = NU.dimshuffle('x', 0, 1) Z = z.dimshuffle(1, 0, 'x') Z = TT.extra_ops.repeat(Z, NU.shape[2], 2) S = s.dimshuffle('x', 0, 'x') cdf = self._margin(NU, TT.sqr(S), Z) return cdf
def get_output(self, train=False): X = self.get_input(train) full = T.ones_like(X) masks = [full] for i in xrange(len(self.input_shapes)): mask = T.ones_like(X) idx = 0 for j in xrange(len(self.input_shapes)): if i == j: try: ishape = len(self.input_shapes[0]) except: ishape = [1] pass if len(ishape) == 3: mask = T.set_subtensor(mask[:,:,idx : idx+ self.input_shapes[j]], 0) elif len(ishape) == 2: mask = T.set_subtensor(mask[:,idx : idx+ self.input_shapes[j]], 0) elif len(ishape) == 1: mask = T.set_subtensor(mask[idx : idx+ self.input_shapes[j]], 0) else: raise NotImplementedError() idx = idx + self.input_shapes[j] masks += [mask] masked = T.stack(masks) if train: index = self.trng.random_integers(size=(1,),low = 0, high = len(masks)-1)[0] else: index = 0 masked_output = X * masked[index] return masked_output
def step_fun(self): if self._step_fun is None: inputs = T.matrix('inputs') states_tm1 = [T.matrix('state_%d_%d_tm1' % (layer, state)) for layer in range(self.n_layers) for state in range(self.gate0.n_states)] if self.gates[-1].use_attention: raise NotImplementedError('Stacked RNN with attention') attended=T.tensor3('attended') attended_dot_u=T.tensor3('attended_dot_u') attention_mask=T.matrix('attention_mask') self._step_fun = function( [inputs] + states_tm1 + [ attended, attended_dot_u, attention_mask], self.step(*([inputs, T.ones(inputs.shape[:-1])] + states_tm1 + [T.ones_like(states_tm1[0]), attended, attended_dot_u, attention_mask])), name='%s_step_fun'%self.name) else: self._step_fun = function( [inputs] + states_tm1, self.step(*([inputs, T.ones(inputs.shape[:-1])] + states_tm1 + [T.ones_like(states_tm1[0])])), name='%s_step_fun'%self.name) return self._step_fun
def calc_CER(self, resultseq, targetseq, resultseq_mask=None, targetseq_mask=None): """ Calculate the character error rate (CER) given ground truth 'targetseq' and CTC decoding output 'resultseq' :param resultseq (T1, B) :param resultseq_mask (T1, B) :param targetseq (T2, B) :param targetseq_mask (T2, B) :return: CER scalar """ if resultseq_mask is None: resultseq_mask = tensor.ones_like(resultseq) if targetseq_mask is None: targetseq_mask = tensor.ones_like(targetseq) def step(result_seq, target_seq, result_seq_mask, target_seq_mask, TE, TG): L1 = tensor.cast(result_seq_mask.sum(), 'int32') L2 = tensor.cast(target_seq_mask.sum(), 'int32') d = self._editdist(result_seq[0:L1], target_seq[0:L2]) TE += d TG += target_seq_mask.sum() return TE, TG outputs, updates = theano.scan(fn=step, sequences=[resultseq.T, targetseq.T, resultseq_mask.T, targetseq_mask.T], outputs_info=[tensor.zeros(1), tensor.zeros(1)], name='calc_CER') TE, TG = outputs[0][-1], outputs[1][-1] CER = TE/TG return CER, TE, TG
def f(X): """ Apply hard local winner-take-all on every rows of a theano matrix. Parameters ---------- p: theano matrix Matrix on whose rows LWTA will be applied. block_size: int Number of units in each block. """ p = X batch_size = p.shape[0] num_filters = p.shape[1] num_blocks = num_filters // block_size w = p.reshape((batch_size, num_blocks, block_size)) block_max = w.max(axis=2).dimshuffle(0, 1, 'x') * T.ones_like(w) max_mask = T.cast(w >= block_max, 'float32') indices = np.array(range(1, block_size + 1)) max_mask2 = max_mask * indices block_max2 = max_mask2.max(axis=2).dimshuffle( 0, 1, 'x') * T.ones_like(w) max_mask3 = T.cast(max_mask2 >= block_max2, 'float32') w2 = w * max_mask3 w3 = w2.reshape((p.shape[0], p.shape[1])) return w3
def build_model(self): print '\n... building the model with unroll=%d, backroll=%d' \ % (self.source.unroll, self.source.backroll) x = T.imatrix('x') y = T.imatrix('y') reset = T.scalar('reset') hiddens = [h['init'] for h in self.hiddens.values()] outputs_info = [None] * 3 + hiddens [losses, probs, errors, hids], updates = \ theano.scan(self.step, sequences=[x, y], outputs_info=outputs_info) loss = losses.sum() error = errors.sum() / T.cast((T.neq(y, 255).sum()), floatX) hidden_updates_train = [] hidden_updates_test = [] for h in self.hiddens.values(): h_train = ifelse(T.eq(reset, 0), \ hids[-1-self.source.backroll, :], T.ones_like(h['init'])) h_test = ifelse(T.eq(reset, 0), \ hids[-1, :], T.ones_like(h['init'])) hidden_updates_train.append((h['init'], h_train)) hidden_updates_test.append((h['init'], h_test)) updates = self.source.get_updates(loss, self.sgd_params) updates += hidden_updates_train rets = [loss, probs[-1, :], error] mode = theano.Mode(linker='cvm') train_model = theano.function([x, y, reset, self.lr], rets, \ updates=updates, mode=mode) test_model = theano.function([x, y, reset], rets, \ updates=hidden_updates_test, mode=mode) return train_model, test_model
def castray(ro, rd, shape_params, nprims, width, height): tmin = 1.0 tmax = 20.0 precis = 0.002 m = -1.0 # There are a sequence of distances, d1, d2, ..., dn # then theres the accumulated distances d1, d1+d2, d1+d2+d3.... # What we actually want in the output is the sfor each ray the distance to the surface # So we want something like 0, 20, 25, 27, 28, 28, 28, 28, 28 # OK max_num_steps = 25 # distcolors = map(ro + rd * 0, width, height) #FIXME, reshape instead of mul by 0 distcolors = mapedit(ro + rd * 0, shape_params, nprims, width, height) dists = distcolors steps = T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists)) accum_dists = T.reshape(dists, (width, height, 1)) for i in range(max_num_steps - 1): # distcolors = map(ro + rd * accum_dists, width, height) #FIXME, reshape instead of mul by 0 distcolors = mapedit(ro + rd * accum_dists, shape_params, nprims, width, height) #FIXME, reshape instead of mul by 0 dists = distcolors steps = steps + T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists)) accum_dists = accum_dists + T.reshape(dists, (width, height, 1)) last_depth = T.reshape(accum_dists, (width, height)) depthmap = T.switch(last_depth < tmax, last_depth / tmax, T.zeros_like(last_depth)) color = 1.0 - steps / float(max_num_steps) # Distance marched along ray and delta between last two steps return depthmap
def forward_prop_step(x_t, s_t1_prev, s_t2_prev): ''' Inner function encapsulating a propagation step This is how we calculated the hidden state in a simple RNN. No longer! s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev)) ''' # Word embedding layer x_e = E[:,x_t] # GRU Layer 1 z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # GRU Layer 2 z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev # Final output calculation # Theano's softmax returns a matrix with one row, we only need the row o_t = T.nnet.softmax(V.dot(s_t2) + c)[0] return [o_t, s_t1, s_t2]
def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = tt.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = tt.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = tt.ones_like(Qffd) * sigma2 trace = ((1.0 / (2.0 * sigma2)) * (tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0)))) else: # DTC Lamd = tt.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, tt.dot(A, r_l)) constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi) logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B))) quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def forward_prop_step(x_t, dropmask_t, s_1_prev, s_2_prev): # Word Embeding layer x_e = E.dot(x_t.T) x_e = x_e.astype(theano.config.floatX) drop_mask = T.ones_like(U_update[0].astype(theano.config.floatX),dtype=theano.config.floatX) if regularization_type == RegularizationType.DROP_CONNECT: drop_mask = dropmask_t # GRU Layer 1 update_gate_1 = T.nnet.hard_sigmoid((drop_mask * U_update[0]).dot(x_e) + W_update[0].dot(s_1_prev) + b_update[0]) reset_gate_1 = T.nnet.hard_sigmoid((drop_mask * U_reset[0]).dot(x_e) + W_reset[0].dot(s_1_prev) + b_reset[0]) c_1 = T.tanh((drop_mask * U_candidate[0]).dot(x_e) + W_candidate[0].dot(s_1_prev * reset_gate_1) + b_candidate[0]) s_1 = (T.ones_like(update_gate_1) - update_gate_1) * c_1 + update_gate_1 * s_1_prev # GRU Layer 2 update_gate_2 = T.nnet.hard_sigmoid((drop_mask * U_update[0]).dot(s_1) + W_update[0].dot(s_2_prev) + b_update[0]) reset_gate_2 = T.nnet.hard_sigmoid((drop_mask * U_reset[0]).dot(s_1) + W_reset[0].dot(s_2_prev) + b_reset[0]) c_2 = T.tanh((drop_mask * U_candidate[0]).dot(s_1) + W_candidate[0].dot(s_2_prev * reset_gate_2) + b_candidate[0]) s_2 = (T.ones_like(update_gate_2) - update_gate_2) * c_2 + update_gate_2 * s_2_prev # Final output calculation # Theano's softmax returns a matrix with one row, we only need the row o_t = T.nnet.softmax(V.dot(s_2) + output_bias)[0] return [o_t, s_1, s_2]
def __init__(self, optimizer_params, model_obj=None, X=None, Y=None, Y_aux=[], top_loss=None, params=None): print "Compiling RPROP..." super(compileRPROP, self).__init__(model_obj, X, Y, Y_aux, top_loss, params) self.LRs = [] RPROP_updates = [] # Initialise shared variables for the Training algos for i, para in enumerate(self.params): if para in self.params[:i]: print "Detected RNN or shared param @index =", i else: self.LRs.append( theano.shared( np.float32(optimizer_params["initial_update_size"]) * np.ones(para.get_value().shape, dtype="float32"), name=para.name + str("_RPROP"), borrow=0, ) ) print "RPROP: missing backtracking handling " ###TODO ??? for param_i, grad_i, last_grad_i, pLR_i in zip(self.params, self.gradients, self.last_grads, self.LRs): # Commented code on next 4 lines is theano-incapable and just illustration!!! # if ((last_grad_i*grad_i) < -1e-9): # sign-change & significant magnitude of last two gradients # pLR_i_new = pLR_i * (1 - np.float32(RPROP_penalty)) # decrease this LR # elif ((last_grad_i*grad_i) > 1e-11): # no sign-change & and last two gradients were sufficiently big # pLR_i_new = pLR_i * (1 + np.float32(RPORP_gain)) # increase this LR # capping RPROP-LR inside [1e-7,2e-3] RPROP_updates.append( ( pLR_i, T.minimum( T.maximum( pLR_i * ( 1 - np.float32(optimizer_params["penalty"]) * ((last_grad_i * grad_i) < -1e-9) + np.float32(optimizer_params["gain"]) * ((last_grad_i * grad_i) > 1e-11) ), 1e-7 * T.ones_like(pLR_i), ), 2e-3 * T.ones_like(pLR_i), ), ) ) RPROP_updates.append( (param_i, param_i - pLR_i * grad_i / (T.abs_(grad_i) + 1e-6) - (self.weightdecay * param_i)) ) RPROP_updates.append((last_grad_i, grad_i)) self.step = theano.function( [self.X, self.Y] + self.Y_aux, [self.top_loss, self.loss_instance], updates=RPROP_updates, on_unused_input="warn", ) print " Compiling done - in %.3f s!" % (time.time() - self.t_init)
def __init__(self, gtype, alfa=0.02, ifreset=False, countmax=100): self._alfa = alfa self._gradsum = T.ones_like(gtype) self._gradsum_init = T.ones_like(gtype) # parameters for resetting _grad_sum self._ifreset = ifreset self._counter = 0 self._countmax = countmax
def sample(self, alpha, beta): z_1 = super(BetaSample, self).sample(alpha, T.ones_like(alpha)) z_2 = super(BetaSample, self).sample(beta, T.ones_like(beta)) return z_1 / (z_1 + z_2)
def adaptive_mask(mask, black=0., ignore=0.5, white=1.): bw = ignore * T.ones_like(mask, dtype=floatX) t_black = black*T.ones_like(bw, dtype=floatX) t_white = white*T.ones_like(bw, dtype=floatX) white_idx = (mask > MASK["IGNORE"]).nonzero() black_idx = (mask < MASK["BACKGROUND_RING"]).nonzero() bw = T.set_subtensor(bw[white_idx], t_white[white_idx]) bw = T.set_subtensor(bw[black_idx], t_black[black_idx]) return bw
def get_model(self,X, Y, x_test): ''' Gaussian Process Regression model. Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006 Args: X: tensor matrix, training data Y: tensor matrix, training target x_test: tensor matrix, testing data Returns: K: prior cov matrix Ks: prior joint cov matrix Kss: prior cov matrix for testing data Posterior Distribution: alpha: alpha = inv(K)*(mu-m) sW: vector containing diagonal of sqrt(W) L: L = chol(sW*K*sW+eye(n)) y_test_mu: predictive mean y_test_var: predictive variance fs2: predictive latent variance Note: the cov matrix inverse is computed through Cholesky factorization https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/ ''' # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14) K = self.covFunc(X,X,'K') # pior cov #m = T.mean(Y)*T.ones_like(Y) # pior mean m = self.mean*T.ones_like(Y) # pior mean # Compute GP joint prior distribution between training and test (eq 2.18) Ks = self.covFunc(X,x_test,'Ks') # Pay attention!! here is the self test cov matrix. Kss = self.covFunc(x_test,x_test,'Kss',mode='self_test') # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood. sn2 = T.exp(2*self.sigma_n) # noise variance of likGauss L = sT.cholesky(K/sn2 + T.identity_like(K)) sl = sn2 alpha = T.dot(sT.matrix_inverse(L.T), T.dot(sT.matrix_inverse(L), (Y-m)) ) / sl sW = T.ones_like(T.sum(K,axis=1)).reshape((K.shape[0],1)) / T.sqrt(sl) log_likelihood = T.sum(-0.5 * (T.dot((Y-m).T, alpha)) - T.sum(T.log(T.diag(L))) - X.shape[0] / 2 * T.log(2.*np.pi*sl)) # Compute predictive distribution using the computed posterior distribution. fmu = m + T.dot(Ks.T, alpha) # Prediction Mu fs|f, eq 2.25 V = T.dot(sT.matrix_inverse(L),T.extra_ops.repeat(sW,x_test.shape[0],axis=1)*Ks) fs2 = Kss - (T.sum(V*V,axis=0)).reshape((1,V.shape[1])).T # Predication Sigma, eq 2.26 fs2 = T.maximum(fs2,0) # remove negative variance noise #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions y_test_mu = fmu y_test_var = fs2 + sn2 return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha,V, fs2,sW
def _alignData(self, w_t, sv_tm1): # padding dummy element mask = T.concatenate([T.ones_like(sv_tm1), T.ones_like(sv_tm1[:,-1:])],axis=1) # iterate over batch mask,_ = theano.scan(fn=self._batchAlign, sequences=[w_t,mask], outputs_info=None) # mask the slot-value vector sv_t = mask[:,:-1] * sv_tm1 return sv_t
def _step(m_, x_, h_): preact = T.dot(h_, self.U) preact += x_ z = T.nnet.sigmoid(_slice(preact, 0, hidden_size)) r = T.nnet.sigmoid(_slice(preact, 1, hidden_size)) c = T.tanh(_slice(preact, 2, hidden_size) * r + (T.ones_like(r) - r) * _slice(x_, 2, hidden_size)) h = (T.ones_like(z) - z) * c + z * h_ h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h
def gibbs_iteration(g1, s1, h1, t1, v): if mean_field: g2 = self.g_given_htv(h1, t1, v) s2 = self.s_given_ghtv(T.ones_like(g2), h1, t1, v) h2 = self.h_given_gsv(g2, s2, v) t2 = self.t_given_gshv(g2, s2, T.ones_like(h2), v) else: g2 = self.sample_g_given_htv(h1, t1, v) s2 = self.sample_s_given_ghtv(g2, h1, t1, v) h2 = self.sample_h_given_gsv(g2, s2, v) t2 = self.sample_t_given_gshv(g2, s2, h2, v) return [g2, s2, h2, t2]
def test_gpualloc_input_on_gpu(): a_val = numpy.asarray(numpy.random.rand(4,5),dtype='float32') a = tcn.shared_constructor(a_val) b = T.fscalar() f = theano.function([b], T.ones_like(a)+b, mode=mode_without_gpu) f_gpu = theano.function([b], T.ones_like(a)+b, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.env.toposort()])==1 assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()])==1 assert numpy.allclose(numpy.ones(a.get_value(borrow=True).shape)+9,f_gpu(9)) assert numpy.allclose(f(5),f_gpu(5))
def forward_prop_step(x_t, s_t1_prev, s_t2_prev): x_e = E[:, x_t] #L1 z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # L2 z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev o_t = T.nnet.softmax(V.dot(s_t2) + c)[0] return [o_t, s_t1, s_t2]
def gan_binary_crossentropy(d_out_given_fake_for_gen, d_out_given_fake_for_dis, d_out_given_real): d_loss_fake = binary_crossentropy( T.zeros_like(d_out_given_fake_for_dis), d_out_given_fake_for_dis).mean() d_loss_real = binary_crossentropy( T.ones_like(d_out_given_real), d_out_given_real).mean() d_loss = d_loss_real + d_loss_fake g_loss = binary_crossentropy( T.ones_like(d_out_given_fake_for_gen), d_out_given_fake_for_gen).mean() return g_loss, d_loss, d_loss_real, d_loss_fake
def build_loss(deterministic): # this currently has the problem that these 3 expressions come from 3 different # get_output calls, so they won't return the same mask if dropout or other # noise is used. Currently not using dropout so not a problem. ae = nn.layers.get_output(ldict['ae_out'], deterministic=deterministic) disc_real = nn.layers.get_output(ldict['disc_out'], deterministic=deterministic) disc_fake = nn.layers.get_output(ldict['disc_out'], { ldict['disc_in']:ae }, deterministic=deterministic) d_cost_real=nn.objectives.binary_crossentropy(disc_real, T.ones_like(disc_real)).mean() d_cost_fake=nn.objectives.binary_crossentropy(disc_fake, T.zeros_like(disc_fake)).mean() g_cost=nn.objectives.binary_crossentropy(disc_fake, T.ones_like(disc_fake)).mean() d_cost = d_cost_real + d_cost_fake mse = nn.objectives.squared_error(ae, X).mean() return g_cost, d_cost, mse
def test_scan_err1(self): # This test should fail when building fx for the first time orig_compute_test_value = theano.config.compute_test_value try: theano.config.compute_test_value = 'raise' k = T.iscalar("k") A = T.matrix("A") k.tag.test_value = 3 A.tag.test_value = numpy.random.rand(5,3).astype(config.floatX) def fx(prior_result, A): return T.dot(prior_result, A) # Since we have to inspect the traceback, # we cannot simply use self.assertRaises() try: theano.scan( fn=fx, outputs_info=T.ones_like(A), non_sequences=A, n_steps=k) assert False except ValueError, e: # Get traceback tb = sys.exc_info()[2] # Get frame info 4 layers up frame_info = traceback.extract_tb(tb)[-5] # We should be in the "fx" function defined above assert os.path.split(frame_info[0])[1] == 'test_compute_test_value.py' assert frame_info[2] == 'fx' finally: theano.config.compute_test_value = orig_compute_test_value
def get_constraint_updates(self): constraint_updates = OrderedDict() if self.flags['wv_norm'] == 'unit': constraint_updates[self.Wv] = self.Wv / self.norm_wv elif self.flags['wv_norm'] == 'max_unit': constraint_updates[self.Wv] = self.Wv / self.norm_wv * T.minimum(self.norm_wv, 1.0) if self.flags['scalar_lambd']: constraint_updates[self.lambd] = T.mean(self.lambd) * T.ones_like(self.lambd) ## Enforce sparsity pattern on g if required ## if self.sparse_gmask: constraint_updates[self.Wg] = self.Wg * self.sparse_gmask.mask.T ## clip parameters to maximum values (if applicable) for (k,v) in self.clip_max.iteritems(): assert k in [param.name for param in self.params()] param = constraint_updates.get(k, getattr(self, k)) constraint_updates[param] = T.clip(param, param, v) ## clip parameters to minimum values (if applicable) for (k,v) in self.clip_min.iteritems(): assert k in [param.name for param in self.params()] param = constraint_updates.get(k, getattr(self, k)) constraint_updates[param] = T.clip(constraint_updates.get(param, param), v, param) return constraint_updates
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) self.weights = T.ones_like(self.y_train) train_loss = weighted_loss(self.y, self.y_train, self.weights) test_loss = weighted_loss(self.y, self.y_test, self.weights) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.y.name = 'y' if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] predict_ins = self.X_test else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): sigma2 = tt.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = tt.sum(A * A, 0) if self.approx == "FITC": Kffd = cov_total(X, diag=True) Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 else: # VFE or DTC Lamd = tt.ones_like(Qffd) * sigma2 A_l = A / Lamd L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) r = y - mean_total(X) r_l = r / Lamd c = solve_lower(L_B, tt.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c)) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0) if pred_noise: var += sigma2 return mu, var else: cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) + tt.dot(tt.transpose(C), C)) if pred_noise: cov += sigma2 * tt.identity_like(cov) return mu, stabilize(cov)
def test_scan(self): """ Test the compute_test_value mechanism Scan. """ orig_compute_test_value = theano.config.compute_test_value try: theano.config.compute_test_value = 'raise' #theano.config.compute_test_value = 'warn' k = T.iscalar("k") A = T.vector("A") k.tag.test_value = 3 A.tag.test_value = numpy.random.rand(5).astype(config.floatX) def fx(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = theano.scan(fn=fx, outputs_info=T.ones_like(A), non_sequences=A, n_steps=k) # We only care about A**k, but scan has provided us with A**1 through A**k. # Discard the values that we don't care about. Scan is smart enough to # notice this and not waste memory saving them. final_result = result[-1] assert hasattr(final_result.tag, 'test_value') finally: theano.config.compute_test_value = orig_compute_test_value
def one_step(x_t, s_t_prev, U, V, W): z = T.nnet.hard_sigmoid(T.dot(U[0], x_t) + T.dot(s_t_prev, W[0])) r = T.nnet.hard_sigmoid(T.dot(U[1], x_t) + T.dot(s_t_prev, W[1])) h = T.tanh(T.dot(U[2], x_t) + T.dot((s_t_prev * r), W[2])) s_t = (T.ones_like(z) - z) * h + z * s_t_prev o_t = T.nnet.softmax(T.dot(V, s_t)) return o_t[0], s_t
def apply(self, inputs, states, cells, location, scale, alpha, mask=None): def slice_last(x, no): return x[:, no * self.lstm_dim:(no + 1) * self.lstm_dim] tanh = self.children[1].apply cropper = self.children[0] # inputs shape: B x C x X x Y # outputs shape: B x C' x X' x Y' conved_part_1 = self.apply_conv( inputs, conv_layers=self.conv_layers[0:self.num_layers_first_half_of_conv]) # inputs shape: B x C x X x Y # outputs shape: B x 1 x X x Y # if self.num_channels == 3: # gray_scale_inputs = self.rgb2gray(inputs) # else: gray_scale_inputs = inputs # inputs shape: B x 1 x X x Y # outputs shape: B x 1 x X' x Y' downn_sampled_inputs = self.down_sampler(gray_scale_inputs) # shape: B x F flat_downn_sampled_inputs = downn_sampled_inputs.flatten(ndim=2) # inputs shape: B x F' # outputs shape: B x 3 mlp_output = self.apply_attention_mlp( tensor.concatenate([ flat_downn_sampled_inputs, 0.00001 * location, 0.00001 * scale, 0.00001 * alpha, states ], axis=1)) location = mlp_output[:, 0:2] location.name = 'location' scale = mlp_output[:, 2:3] scale.name = 'scale' alpha = mlp_output[:, 3:] alpha.name = 'alpha' scale2d = tensor.concatenate([scale, scale], axis=1) alpha2d = tensor.concatenate([alpha, alpha], axis=1) # inputs shape: B x C' x X' x Y' # outputs shape: B x C' x X'' x Y'' loc_to_cropper = ((location + tensor.ones_like(location)) * np.array([ self.cropper_input_shape[0] * 0.4, self.cropper_input_shape[1] * 0.4 ]).astype('float32') + np.array([ self.cropper_input_shape[0] * 0.1, self.cropper_input_shape[1] * 0.1 ]).astype('float32')) scale_to_cropper = ((scale2d + tensor.ones_like(scale2d)) * np.array([ (1.1 - self.min_scale[0]) / 2.0, (1.1 - self.min_scale[1]) / 2.0 ]).astype('float32') + np.array(self.min_scale).astype('float32')) alpha_to_cropper = ((alpha2d + tensor.ones_like(alpha2d)) * np.array( [0.98 / 2.0 + 0.001, 0.98 / 2.0 + 0.001]).astype('float32')) patch, _, _ = cropper.apply( conved_part_1, np.array([list(self.cropper_input_shape)]), # 0.00001 * loc_to_cropper + locs, # 0.00001 * scale_to_cropper + 1.0 * tensor.ones_like(scale_to_cropper), # 0.00001 * alpha_to_cropper + 0.001 * tensor.ones_like(scale_to_cropper)) loc_to_cropper, scale_to_cropper, alpha_to_cropper) patch.name = 'patch' conved_part_2 = self.apply_conv( patch, conv_layers=self.conv_layers[self.num_layers_first_half_of_conv:]) flat_conved_part_2 = conved_part_2.flatten(2) pre_lstm = self.apply_fc(flat_conved_part_2) pre_lstm = tensor.concatenate([pre_lstm, location, scale, alpha], axis=1) transformed_pre_lstm = tensor.dot(pre_lstm, self.W_pre_lstm) + self.b_pre_lstm activation = tensor.dot(states, self.W_state) + transformed_pre_lstm in_gate = tensor.nnet.sigmoid(slice_last(activation, 0)) forget_gate_input = slice_last(activation, 1) forget_gate = tensor.nnet.sigmoid(forget_gate_input + tensor.ones_like(forget_gate_input)) next_cells = (forget_gate * cells + in_gate * tanh(slice_last(activation, 2))) out_gate = tensor.nnet.sigmoid(slice_last(activation, 3)) next_states = out_gate * tanh(next_cells) if mask: next_states = (mask[:, None] * next_states + (1 - mask[:, None]) * states) next_cells = (mask[:, None] * next_cells + (1 - mask[:, None]) * cells) return (next_states, next_cells, location, scale, alpha, patch, downn_sampled_inputs, conved_part_1, conved_part_2, pre_lstm)
def get_output_for(self, input, **kwargs): return T.ones_like(input) * self.constant
t_worst * 1000) #------------------------------------------------------------------------- i = T.arange(100) A = theano.shared(np.random.normal(size=(10, 10))) def fn1(seq, acc): return T.dot(acc, A) print "-" * 78 print "Unrolled SCAN:" outputs, updates = unrolled_scan(fn1, name='fn1', sequences=[i], outputs_info=[T.ones_like(A)], unroll=10) f_fn1 = theano.function([], outputs[-1], name='fn1') res = f_fn1() print res.shape print res benchmark(f_fn1) print "-" * 78 print "Normal SCAN:" outputs, updates = theano.scan(fn1, name='fn1', sequences=[i], outputs_info=[T.ones_like(A)]) f_fn1 = theano.function([], outputs[-1], name='fn1')
def __init__(self, rng=None, x_in=None, x_mask=None, x_out=None, \ p_zi_given_xi=None, \ p_sip1_given_zi=None, \ q_zi_given_xi=None, \ params=None, \ shared_param_dicts=None): # setup a rng for this GIPair self.rng = RandStream(rng.randint(100000)) # grab the user-provided parameters self.params = params self.x_dim = self.params['x_dim'] self.z_dim = self.params['z_dim'] self.imp_steps = self.params['imp_steps'] self.step_type = self.params['step_type'] self.x_type = self.params['x_type'] assert ((self.x_type == 'bernoulli') or (self.x_type == 'gaussian')) self.shared_param_dicts = shared_param_dicts # grab handles to the relevant InfNets self.p_zi_given_xi = p_zi_given_xi self.p_sip1_given_zi = p_sip1_given_zi self.q_zi_given_xi = q_zi_given_xi # record the symbolic variables that will provide inputs to the # computation graph created to describe this MultiStageModel self.x_in = x_in self.x_out = x_out self.x_mask = x_mask self.zi_zmuv = T.tensor3() # setup switching variable for changing between sampling/training zero_ary = to_fX(np.zeros((1, ))) self.train_switch = theano.shared(value=zero_ary, name='msm_train_switch') self.set_train_switch(1.0) if self.shared_param_dicts is None: # initialize parameters "owned" by this model s0_init = to_fX(np.zeros((self.x_dim, ))) init_ary = to_fX(np.zeros((self.x_dim, ))) self.x_null = theano.shared(value=init_ary, name='gpis_xn') self.grad_null = theano.shared(value=init_ary, name='gpsi_gn') self.s0 = theano.shared(value=s0_init, name='gpsi_s0') self.obs_logvar = theano.shared(value=zero_ary, name='gpsi_obs_logvar') self.bounded_logvar = 8.0 * T.tanh( (1.0 / 8.0) * self.obs_logvar[0]) self.shared_param_dicts = {} self.shared_param_dicts['x_null'] = self.x_null self.shared_param_dicts['grad_null'] = self.grad_null self.shared_param_dicts['s0'] = self.s0 self.shared_param_dicts['obs_logvar'] = self.obs_logvar else: # grab the parameters required by this model from a given dict self.x_null = self.shared_param_dicts['x_null'] self.grad_null = self.shared_param_dicts['grad_null'] self.s0 = self.shared_param_dicts['s0'] self.obs_logvar = self.shared_param_dicts['obs_logvar'] self.bounded_logvar = 8.0 * T.tanh( (1.0 / 8.0) * self.obs_logvar[0]) ################################################## # Setup the iterative imputation loop using scan # ################################################## self.ones_mask = T.ones_like(self.x_mask) def imp_step_func(zi_zmuv, si): si_as_x = self._si_as_x(si) xi_unmasked = self.x_out xi_masked = (self.x_mask * xi_unmasked) + \ ((1.0 - self.x_mask) * si_as_x) grad_unmasked = self.x_out - si_as_x grad_masked = self.x_mask * grad_unmasked # get samples of next zi, according to the global policy zi_p_mean, zi_p_logvar = self.p_zi_given_xi.apply(xi_masked) zi_p = zi_p_mean + (T.exp(0.5 * zi_p_logvar) * zi_zmuv) # get samples of next zi, according to the guide policy zi_q_mean, zi_q_logvar = self.q_zi_given_xi.apply( T.concatenate([xi_masked, xi_unmasked], axis=1)) zi_q = zi_q_mean + (T.exp(0.5 * zi_q_logvar) * zi_zmuv) # make zi samples that can be switched between zi_p and zi_q zi = ((self.train_switch[0] * zi_q) + \ ((1.0 - self.train_switch[0]) * zi_p)) # compute relevant KLds for this step kldi_q2p = gaussian_kld(zi_q_mean, zi_q_logvar, zi_p_mean, zi_p_logvar) # KL(q || p) kldi_p2q = gaussian_kld(zi_p_mean, zi_p_logvar, zi_q_mean, zi_q_logvar) # KL(p || q) kldi_p2g = gaussian_kld(zi_p_mean, zi_p_logvar, 0.0, 0.0) # KL(p || global prior) # compute the next si, given the sampled zi hydra_out = self.p_sip1_given_zi.apply(zi) si_step = hydra_out[0] if (self.step_type == 'jump'): # jump steps always completely overwrite the current guesses sip1 = si_step elif (self.step_type == 'add'): # add steps just update the guesses additively sip1 = si + si_step elif (self.step_type == 'lstm'): # LSTM-style updates with write and erase gates write_gate = 1.1 * T.nnet.sigmoid(1.0 + hydra_out[1]) erase_gate = 1.1 * T.nnet.sigmoid(1.0 + hydra_out[2]) sip1 = (erase_gate * si) + (write_gate * si_step) elif (self.step_type == 'layer'): alpha_gate = T.nnet.sigmoid(hydra_out[1]) sip1 = (alpha_gate * si) + ((1.0 - alpha_gate) * si_step) else: assert False, "Unknown step type!" # compute NLL for the current imputation nlli = self._construct_nll_costs(sip1, self.x_out, self.x_mask) return sip1, nlli, kldi_q2p, kldi_p2q, kldi_p2g # apply scan op for the sequential imputation loop self.s0_full = T.alloc(0.0, self.x_in.shape[0], self.x_dim) + self.s0 init_vals = [self.s0_full, None, None, None, None] self.scan_results, self.scan_updates = theano.scan(imp_step_func, \ outputs_info=init_vals, sequences=self.zi_zmuv) self.si = self.scan_results[0] self.nlli = self.scan_results[1] self.kldi_q2p = self.scan_results[2] self.kldi_p2q = self.scan_results[3] self.kldi_p2g = self.scan_results[4] # get the initial imputation state self.x0 = (self.x_mask * self.x_in) + \ ((1.0 - self.x_mask) * self._si_as_x(self.s0_full)) ###################################################################### # ALL SYMBOLIC VARS NEEDED FOR THE OBJECTIVE SHOULD NOW BE AVAILABLE # ###################################################################### # shared var learning rate for generator and inferencer zero_ary = to_fX(np.zeros((1, ))) self.lr = theano.shared(value=zero_ary, name='gpsi_lr') # shared var momentum parameters for generator and inferencer self.mom_1 = theano.shared(value=zero_ary, name='gpsi_mom_1') self.mom_2 = theano.shared(value=zero_ary, name='gpsi_mom_2') # init parameters for controlling learning dynamics self.set_sgd_params() # init shared var for weighting nll of data given posterior sample self.lam_nll = theano.shared(value=zero_ary, name='gpsi_lam_nll') self.set_lam_nll(lam_nll=1.0) # init shared var for weighting prior kld against reconstruction self.lam_kld_p = theano.shared(value=zero_ary, name='gpsi_lam_kld_p') self.lam_kld_q = theano.shared(value=zero_ary, name='gpsi_lam_kld_q') self.lam_kld_g = theano.shared(value=zero_ary, name='gpsi_lam_kld_g') self.set_lam_kld(lam_kld_p=0.05, lam_kld_q=0.95, lam_kld_g=0.0) # init shared var for controlling l2 regularization on params self.lam_l2w = theano.shared(value=zero_ary, name='msm_lam_l2w') self.set_lam_l2w(1e-5) # Grab all of the "optimizable" parameters in "group 1" self.joint_params = [self.s0, self.obs_logvar] self.joint_params.extend(self.p_zi_given_xi.mlp_params) self.joint_params.extend(self.p_sip1_given_zi.mlp_params) self.joint_params.extend(self.q_zi_given_xi.mlp_params) ################################# # CONSTRUCT THE KLD-BASED COSTS # ################################# self.kld_p, self.kld_q, self.kld_g = self._construct_kld_costs(p=1.0) self.kld_costs = (self.lam_kld_p[0] * self.kld_p) + \ (self.lam_kld_q[0] * self.kld_q) + \ (self.lam_kld_g[0] * self.kld_g) self.kld_cost = T.mean(self.kld_costs) ################################# # CONSTRUCT THE NLL-BASED COSTS # ################################# self.nll_costs = self.nlli[-1] self.nll_cost = self.lam_nll[0] * T.mean(self.nll_costs) self.nll_bounds = self.nll_costs.ravel() + self.kld_q.ravel() self.nll_bound = T.mean(self.nll_bounds) ######################################## # CONSTRUCT THE REST OF THE JOINT COST # ######################################## param_reg_cost = self._construct_reg_costs() self.reg_cost = self.lam_l2w[0] * param_reg_cost self.joint_cost = self.nll_cost + self.kld_cost + self.reg_cost ############################## # CONSTRUCT A PER-TRIAL COST # ############################## self.obs_costs = self.nll_costs + self.kld_costs # Get the gradient of the joint cost for all optimizable parameters print("Computing gradients of self.joint_cost...") self.joint_grads = OrderedDict() grad_list = T.grad(self.joint_cost, self.joint_params) for i, p in enumerate(self.joint_params): self.joint_grads[p] = grad_list[i] # Construct the updates for the generator and inferencer networks self.joint_updates = get_adam_updates(params=self.joint_params, \ grads=self.joint_grads, alpha=self.lr, \ beta1=self.mom_1, beta2=self.mom_2, \ mom2_init=1e-3, smoothing=1e-4, max_grad_norm=10.0) for k, v in self.scan_updates.items(): self.joint_updates[k] = v # Construct a function for jointly training the generator/inferencer print("Compiling cost computer...") self.compute_raw_costs = self._construct_raw_costs() print("Compiling training function...") self.train_joint = self._construct_train_joint() print("Compiling free-energy sampler...") self.compute_fe_terms = self._construct_compute_fe_terms() print("Compiling best step cost computer...") self.compute_per_step_cost = self._construct_compute_per_step_cost() print("Compiling data-guided imputer sampler...") self.sample_imputer = self._construct_sample_imputer() # make easy access points for some interesting parameters #self.gen_inf_weights = self.p_zi_given_xi.shared_layers[0].W return
def sym_gradients_new(self, X): non_linearity_name = self.parameters["nonlinearity"].get_name() assert (non_linearity_name == "sigmoid" or non_linearity_name == "RLU") # First element is different (it is predicted from the bias only) init_a = T.zeros_like(T.dot(X.T, self.W)) # BxH init_x = T.ones_like(X[0]) def a_i_given_a_im1(x, w, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) return (a, x) ([As, _], updates) = theano.scan(a_i_given_a_im1, sequences=[X, self.W], outputs_info=[init_a, init_x]) top_activations = As[-1] Xs_m1 = T.set_subtensor(X[1:, :], X[0:-1, :]) Xs_m1 = T.set_subtensor(Xs_m1[0, :], 1) # Reconstruct the previous activations and calculate (for that visible dimension) the density and all the gradients def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1): B = T.cast(x_i.shape[0], floatX) pot = a_i * activation_factor h = self.nonlinearity(pot) # BxH z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha) z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma) Alpha = T.nnet.softmax(z_alpha) # BxC Mu = z_mu # BxC Sigma = T.exp(z_sigma) # BxC Phi = -constantX(0.5) * T.sqr( (Mu - T.shape_padright(x_i, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) lp_current = -log_sum_exp(wPhi) # negative log likelihood # lp_current_sum = T.sum(lp_current) Pi = T.exp(wPhi - T.shape_padright(lp_current, 1)) # # dp_dz_alpha = Pi - Alpha # BxC # dp_dz_alpha = T.grad(lp_current_sum, z_alpha) gb_alpha = dp_dz_alpha.mean(0, dtype=floatX) # C gV_alpha = T.dot(h.T, dp_dz_alpha) / B # HxC dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma) # dp_dz_mu = T.grad(lp_current_sum, z_mu) dp_dz_mu = dp_dz_mu * Sigma # Heuristic gb_mu = dp_dz_mu.mean(0, dtype=floatX) gV_mu = T.dot(h.T, dp_dz_mu) / B dp_dz_sigma = Pi * ( T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1) # dp_dz_sigma = T.grad(lp_current_sum, z_sigma) gb_sigma = dp_dz_sigma.mean(0, dtype=floatX) gV_sigma = T.dot(h.T, dp_dz_sigma) / B dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot( dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T) # BxH if non_linearity_name == "sigmoid": dp_dpot = dp_dh * h * (1 - h) elif non_linearity_name == "RLU": dp_dpot = dp_dh * (pot > 0) gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX) # 1 dP_da_i = dP_da_ip1 + dp_dpot * activation_factor # BxH gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)), lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact) p_accum = T.zeros_like(X[0]) dP_da_ip1 = T.zeros_like(top_activations) ([ _, ps, _, gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact ], updates2) = theano.scan(density_and_gradients, go_backwards=True, sequences=[ X, Xs_m1, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling ], outputs_info=[ top_activations, p_accum, dP_da_ip1, None, None, None, None, None, None, None, None ]) # scan with go_backwards returns the matrices in the order they were created, so we have to reverse the order of the rows gW = gW[::-1, :] gb_alpha = gb_alpha[::-1, :] gV_alpha = gV_alpha[::-1, :, :] gb_mu = gb_mu[::-1, :] gV_mu = gV_mu[::-1, :, :] gb_sigma = gb_sigma[::-1, :] gV_sigma = gV_sigma[::-1, :, :] gfact = gfact[::-1] updates.update(updates2) # Returns None return (ps[-1], { "W": gW, "b_alpha": gb_alpha, "V_alpha": gV_alpha, "b_mu": gb_mu, "V_mu": gV_mu, "b_sigma": gb_sigma, "V_sigma": gV_sigma, "activation_rescaling": gfact }, updates)
def _get_gradients_adagrad(self, J): """Get the AdaGrad gradients and squared gradients updates. The returned gradients still need to be multiplied with the general learning rate. Parameters ---------- J : theano variable cost Returns ------- theano variable gradients that are adapted by the AdaGrad algorithm theano variable updated sum of squares for all previous steps """ grads = T.grad(J, [ self.__dict__[self.updatable_parameters[i]] for i in xrange(len(self.updatable_parameters)) ]) for i, _ in enumerate(grads): grads[i] = debug_print(grads[i], 'grads_' + self.updatable_parameters[i]) updated_squares = dict() # Add squared gradient to the squared gradient matrix for AdaGrad and # recalculate the gradient. for i, p in enumerate(self.updatable_parameters): # We need to handle sparse gradient variables differently if isinstance(grads[i], sparse.SparseVariable): # Add the sqares to the matrix power = debug_print(sparse.structured_pow(grads[i], 2.), 'pow_' + p) # Remove zeros (might happen when squaring near zero values) power = sparse.remove0(power) updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power # Get only those squares that will be altered, for all others we # don't have gradients, i.e., we don't need to consider them at # all. sqrt_matrix = sparse.sp_ones_like(power) sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix, 'adagrad_squares_subset_' + p) # Take the square root of the matrix subset. sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix), 'adagrad_sqrt_' + p) # Calc 1. / the square root. sqrt_matrix = debug_print( sparse.structured_pow(sqrt_matrix, -1.), 'adagrad_pow-1_' + p) grads[i] = sparse.mul(grads[i], sqrt_matrix) else: power = debug_print(T.pow(grads[i], 2.), 'pow_' + p) updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power # Call sqrt only for those items that are non-zero. denominator = T.switch( T.neq(updated_squares[p], 0.0), T.sqrt(updated_squares[p]), T.ones_like(updated_squares[p], dtype=floatX)) grads[i] = T.mul(grads[i], 1. / denominator) updated_squares[p] = debug_print(updated_squares[p], 'upd_squares_' + p) for i, _ in enumerate(grads): grads[i] = debug_print( grads[i], 'grads_updated_' + self.updatable_parameters[i]) return grads, updated_squares
def get_elementwise_objective(qvalues, actions, rewards, is_alive="always", qvalues_target=None, state_values_target=None, n_steps=1, gamma_or_gammas=0.99, crop_last=True, state_values_target_after_end="zeros", consider_reference_constant=True, aggregation_function="deprecated", force_end_at_last_tick=False, return_reference=False, loss_function=squared_error): """ Returns squared error between predicted and reference Q-values according to n-step Q-learning algorithm Qreference(state,action) = reward(state,action) + gamma*reward(state_1,action_1) + ... + gamma^n * max[action_n]( Q(state_n,action_n) loss = mean over (Qvalues - Qreference)**2 :param qvalues: [batch,tick,actions] - predicted qvalues :param actions: [batch,tick] - commited actions :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active. :param qvalues_target: Q-values used when computing reference (e.g. r+gamma*Q(s',a_max). shape [batch,tick,actions] examples: (default) If None, uses current Qvalues. Older snapshot Qvalues (e.g. from a target network) :param state_values_target: state values V(s), used when computing reference (e.g. r+gamma*V(s'), shape [batch_size,seq_length,1] double q-learning V(s) = Q_old(s,argmax Q_new(s,a)) expected_value_sarsa V(s) = E_a~pi(a|s) Q(s,a) state values from teacher network (knowledge transfer) Must provide either nothing or qvalues_target or state_values_target, not both at once :param n_steps: if an integer is given, uses n-step q-learning algorithm If 1 (default), this works exactly as normal q-learning If None: propagating rewards throughout the whole sequence of state-action pairs. :param gamma_or_gammas: delayed reward discounts: a single value or array[batch,tick](can broadcast dimensions). :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end :param state_values_target_after_end: [batch,1] - symbolic expression for "next best q-values" for last tick used when computing reference Q-values only. Defaults at T.zeros_like(Q-values[:,0,None,0]). if crop_last=True, simply does not penalize at last tick. If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) :param consider_reference_constant: whether or not zero-out gradient flow through reference_qvalues (True is highly recommended) :param force_end_at_last_tick: if True, forces session end at last tick unless ended otehrwise :param return_reference: if True, returns reference Qvalues. If False, returns squared_error(action_qvalues, reference_qvalues) :param loss_function: loss_function(V_reference,V_predicted). Defaults to (V_reference-V_predicted)**2. Use to override squared error with different loss (e.g. Huber or MAE) :return: mean squared error over Q-values (using formula above for loss) """ if aggregation_function != "deprecated": raise NotImplementedError( "aggregation function has beed deprecated and removed. You can now manually compute " "any V(s) and pass it as state_state_values_target. By default it's qvalues.max(axis=-1)" ) #set defaults and assert shapes if is_alive == 'always': is_alive = T.ones_like(rewards) assert qvalues_target is None or state_values_target is None, "Please provide only one of (qvalues_target," \ "state_values_target) or none of them, not both" assert actions.ndim == rewards.ndim == is_alive.ndim == 2, "actions, rewards and is_alive must have shape [batch,time]" assert qvalues.ndim == 3, "q-values must have shape [batch,time,n_actions]" assert qvalues_target is None or qvalues_target.ndim == 3, "qvalues_target must have shape[batch,time,n_actions]]" assert state_values_target is None or state_values_target.ndim == 2, "state values must have shape [batch,time]" #unless already given V(s), compute V(s) as Qvalues of best actions state_values_target = state_values_target or T.max( qvalues_target or qvalues, axis=-1) # get predicted Q-values for committed actions by both current and target networks action_qvalues = get_values_for_actions(qvalues, actions) # get reference Q-values via Q-learning algorithm reference_qvalues = get_n_step_value_reference( state_values=state_values_target, rewards=rewards, is_alive=is_alive, n_steps=n_steps, gamma_or_gammas=gamma_or_gammas, state_values_after_end=state_values_target_after_end, end_at_tmax=force_end_at_last_tick, crop_last=crop_last, ) if consider_reference_constant: # do not pass gradient through reference Qvalues (since they DO depend on Qvalues by default) reference_qvalues = consider_constant(reference_qvalues) #If asked, make sure loss equals 0 for the last time-tick. if crop_last: reference_qvalues = T.set_subtensor(reference_qvalues[:, -1], action_qvalues[:, -1]) if return_reference: return reference_qvalues else: # tensor of elementwise squared errors elwise_squared_error = loss_function(reference_qvalues, action_qvalues) return elwise_squared_error * is_alive
def __init__(self, input, n_in, index, theta=None, W=None, b=None): #input是一个minibatch,单位是一组赛事,不是一个sample n_out = 1 #对于CL模型来说,并不是每一类构建一个分类平面,一直都只有一个数值,就是每匹马夺冠的概率 #把W和b写在theta里面方便T.grad if theta is None: self.theta = theano.shared( value=numpy.zeros(n_in * n_out + n_out, dtype=theano.config.floatX #dtype='float32' ), name='theta', borrow=True) else: self.theta = theta _W = self.theta[0:n_in * n_out].reshape((n_in, n_out)) _b = self.theta[n_in * n_out:n_in * n_out + n_out] if W is None: self.W = _W self.b = _b else: self.W = W self.b = b # 把线性回归的值exp之后再按组归一化就是最后的值 _raw_w = T.exp(T.dot(input, self.W) + self.b) # 计算每组比赛内的exp和 def cumsum_within_group(_start, _index, _race): start_point = _index[_start] stop_point = _index[_start + 1] return T.sum(_race[start_point:stop_point], dtype='float32') # _cumsum就是每组的exp的合 _cumsum, _ = theano.scan(cumsum_within_group, sequences=[T.arange(index.shape[0] - 1)], non_sequences=[index, _raw_w]) #构造一个rep(cumsum,times)的序列,目的是直接相除从而得到每匹马的概率 # _times里存的是每组比赛的马的数量 self._times, _ = theano.scan( fn=lambda i, index: index[i + 1] - index[i], sequences=[T.arange(index.shape[0] - 1)], non_sequences=index) _raceprobdiv = T.ones_like(_raw_w) # 这里运用的技巧是构造一个等长的序列,然后用T.set_subtensor改变里面的值,SCAN不允许每次输出长度不一样的序列,所以不可以concatenate def change_race_prob_div(_i, _change, _rep, _times, _item): _change = T.set_subtensor( _change[_rep[_i]:_rep[_i + 1]], T.reshape(T.alloc(_item[_i], _times[_i]), (_times[_i], 1))) return _change # _race_prob_div存的是每一位对应的要除的概率归一化的值 _race_prob_div, _ = theano.scan( fn=change_race_prob_div, sequences=[T.arange(index.shape[0] - 1)], outputs_info=[_raceprobdiv], non_sequences=[index, self._times, _cumsum]) #归一化以后的概率值,整个init过程最重要的就是计算每匹马的概率,在普通的logistic里计算这个不需要label,只要一个softmax就行 self.race_prob = _raw_w / _race_prob_div[-1] self.mean_neg_loglikelihood = None self.neg_log_likelihood = None self.pos_log_likelihood = None self.r_square = None self.r_error = None self.params = [self.W, self.b]
def ones_like(x): return T.ones_like(x)
def test_scan_debugprint5(): k = tensor.iscalar("k") A = tensor.dvector("A") # Symbolic description of the result result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A, outputs_info=tensor.ones_like(A), non_sequences=A, n_steps=k) final_result = tensor.grad(result[-1].sum(), A) output_str = theano.printing.debugprint(final_result, file='str') lines = [] for line in output_str.split('\n'): lines += [line] expected_output = """Subtensor{int64} [id A] '' |for{cpu,grad_of_scan_fn}.1 [id B] '' | |Elemwise{sub,no_inplace} [id C] '' | | |Subtensor{int64} [id D] '' | | | |Shape [id E] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |k [id G] | | | | |IncSubtensor{Set;:int64:} [id H] '' | | | | | |AllocEmpty{dtype='float64'} [id I] '' | | | | | | |Elemwise{add,no_inplace} [id J] '' | | | | | | | |k [id G] | | | | | | | |Subtensor{int64} [id K] '' | | | | | | | |Shape [id L] '' | | | | | | | | |Rebroadcast{0} [id M] '' | | | | | | | | |DimShuffle{x,0} [id N] '' | | | | | | | | |Elemwise{second,no_inplace} [id O] '' | | | | | | | | |A [id P] | | | | | | | | |DimShuffle{x} [id Q] '' | | | | | | | | |TensorConstant{1.0} [id R] | | | | | | | |Constant{0} [id S] | | | | | | |Subtensor{int64} [id T] '' | | | | | | |Shape [id U] '' | | | | | | | |Rebroadcast{0} [id M] '' | | | | | | |Constant{1} [id V] | | | | | |Rebroadcast{0} [id M] '' | | | | | |ScalarFromTensor [id W] '' | | | | | |Subtensor{int64} [id K] '' | | | | |A [id P] | | | |Constant{0} [id X] | | |TensorConstant{1} [id Y] | |Subtensor{:int64:} [id Z] '' | | |Subtensor{::int64} [id BA] '' | | | |Subtensor{:int64:} [id BB] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |Constant{-1} [id BC] | | | |Constant{-1} [id BD] | | |ScalarFromTensor [id BE] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{:int64:} [id BF] '' | | |Subtensor{:int64:} [id BG] '' | | | |Subtensor{::int64} [id BH] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |Constant{-1} [id BI] | | | |Constant{-1} [id BJ] | | |ScalarFromTensor [id BK] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{::int64} [id BL] '' | | |IncSubtensor{Inc;int64::} [id BM] '' | | | |Elemwise{second,no_inplace} [id BN] '' | | | | |for{cpu,scan_fn} [id BO] '' | | | | | |k [id G] | | | | | |IncSubtensor{Set;:int64:} [id H] '' | | | | | |A [id P] | | | | |DimShuffle{x,x} [id BP] '' | | | | |TensorConstant{0.0} [id BQ] | | | |IncSubtensor{Inc;int64} [id BR] '' | | | | |Elemwise{second,no_inplace} [id BS] '' | | | | | |Subtensor{int64::} [id BT] '' | | | | | | |for{cpu,scan_fn} [id BO] '' | | | | | | |Constant{1} [id BU] | | | | | |DimShuffle{x,x} [id BV] '' | | | | | |TensorConstant{0.0} [id BQ] | | | | |Elemwise{second} [id BW] '' | | | | | |Subtensor{int64} [id BX] '' | | | | | | |Subtensor{int64::} [id BT] '' | | | | | | |Constant{-1} [id BY] | | | | | |DimShuffle{x} [id BZ] '' | | | | | |Elemwise{second,no_inplace} [id CA] '' | | | | | |Sum{acc_dtype=float64} [id CB] '' | | | | | | |Subtensor{int64} [id BX] '' | | | | | |TensorConstant{1.0} [id R] | | | | |Constant{-1} [id BY] | | | |Constant{1} [id BU] | | |Constant{-1} [id CC] | |Alloc [id CD] '' | | |TensorConstant{0.0} [id BQ] | | |Elemwise{add,no_inplace} [id CE] '' | | | |Elemwise{sub,no_inplace} [id C] '' | | | |TensorConstant{1} [id Y] | | |Subtensor{int64} [id CF] '' | | |Shape [id CG] '' | | | |A [id P] | | |Constant{0} [id CH] | |A [id P] |Constant{-1} [id CI] Inner graphs of the scan ops: for{cpu,grad_of_scan_fn}.1 [id B] '' >Elemwise{add,no_inplace} [id CJ] '' > |Elemwise{mul} [id CK] '' > | |<TensorType(float64, vector)> [id CL] -> [id BL] > | |A_copy [id CM] -> [id P] > |<TensorType(float64, vector)> [id CN] -> [id BL] >Elemwise{add,no_inplace} [id CO] '' > |Elemwise{mul} [id CP] '' > | |<TensorType(float64, vector)> [id CL] -> [id BL] > | |<TensorType(float64, vector)> [id CQ] -> [id Z] > |<TensorType(float64, vector)> [id CR] -> [id CD] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CS] '' > |<TensorType(float64, vector)> [id CT] -> [id H] > |A_copy [id CU] -> [id P] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CS] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CS] '' for{cpu,scan_fn} [id BO] '' >Elemwise{mul,no_inplace} [id CS] '' for{cpu,scan_fn} [id BO] '' >Elemwise{mul,no_inplace} [id CS] ''""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def ones_like(x, dtype=None, name=None): """Instantiates an all-ones variable with the same shape as x. """ return T.ones_like(x, dtype=dtype)
def set_output(self): self._output = tensor.ones_like( self._prev_layer.output) - self._prev_layer.output
def ready(self): args = self.args w_emb_layer = self.w_emb_layer c_emb_layer = self.c_emb_layer r_emb_layers = self.r_emb_layers r_matrix_layers = self.r_matrix_layers char_dim = self.char_dim = args.char_dim char_lstm_dim = self.char_lstm_dim = args.char_lstm_dim word_dim = self.word_dim = args.word_dim word_lstm_dim = self.word_lstm_dim = args.word_lstm_dim dropout = self.dropout = theano.shared( np.float64(args.dropout).astype(theano.config.floatX)) word_ids = self.word_ids = T.ivector('word_ids') char_ids = self.char_ids = T.imatrix('char_ids') char_lens = self.char_lens = T.fvector('char_lens') char_masks = self.char_masks = T.imatrix('char_masks') up_ids = self.up_ids = T.imatrix('up_ids') up_rels = self.up_rels = T.imatrix('up_rels') up_id_masks = self.up_id_masks = T.imatrix('up_id_masks') down_ids = self.down_ids = T.imatrix('down_ids') down_rels = self.down_rels = T.imatrix('down_rels') down_id_masks = self.down_id_masks = T.imatrix('down_id_masks') tag_ids = self.tag_ids = T.ivector('tag_ids') layers = self.layers = [w_emb_layer, c_emb_layer] layers.extend(r_emb_layers) layers.extend(r_matrix_layers) inputs = self.inputs = [] inputs.append(self.word_ids) inputs.append(self.char_ids) inputs.append(self.char_lens) inputs.append(self.char_masks) inputs.append(self.up_ids) inputs.append(self.up_rels) inputs.append(self.up_id_masks) inputs.append(self.down_ids) inputs.append(self.down_rels) inputs.append(self.down_id_masks) inputs.append(self.tag_ids) wslices = w_emb_layer.forward(word_ids) cslices = c_emb_layer.forward(char_ids.ravel()) cslices = cslices.reshape( (char_ids.shape[0], char_ids.shape[1], char_dim)) cslices = cslices.dimshuffle(1, 0, 2) bv_ur_slicess = [] bv_dr_slicess = [] b_ur_slicess = [] b_dr_slicess = [] bv_ur_matrixss = [] bv_dr_matrixss = [] b_ur_matrixss = [] b_dr_matrixss = [] for r_matrix_layer in r_matrix_layers: bv_ur_matrixs = r_matrix_layer.forward1(up_rels.ravel()) bv_dr_matrixs = r_matrix_layer.forward1(down_rels.ravel()) b_ur_matrixs = r_matrix_layer.forward2(up_rels.ravel()) b_dr_matrixs = r_matrix_layer.forward2(down_rels.ravel()) bv_ur_matrixss.append( bv_ur_matrixs.reshape( (up_rels.shape[0], up_rels.shape[1], word_dim, word_dim))) bv_dr_matrixss.append( bv_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim))) b_ur_matrixss.append( b_ur_matrixs.reshape( (up_rels.shape[0], up_rels.shape[1], word_dim, word_dim))) b_dr_matrixss.append( b_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim))) for r_emb_layer in r_emb_layers: bv_ur_slices = r_emb_layer.forward(up_rels.ravel()) bv_dr_slices = r_emb_layer.forward(down_rels.ravel()) b_ur_slices = r_emb_layer.forward2(up_rels.ravel()) b_dr_slices = r_emb_layer.forward2(down_rels.ravel()) bv_ur_slicess.append( bv_ur_slices.reshape( (up_rels.shape[0], up_rels.shape[1], word_dim))) bv_dr_slicess.append( bv_dr_slices.reshape( (down_rels.shape[0], down_rels.shape[1], word_dim))) b_ur_slicess.append( b_ur_slices.reshape( (up_rels.shape[0], up_rels.shape[1], word_dim))) b_dr_slicess.append( b_dr_slices.reshape( (down_rels.shape[0], down_rels.shape[1], word_dim))) char_masks = char_masks.dimshuffle(1, 0) prev_output = wslices prev_size = word_dim if char_dim: layers.append( LSTM(n_in=char_dim, n_out=char_lstm_dim, direction='bi' if args.char_bidirect else 'si')) prev_output_2 = cslices prev_output_2 = apply_dropout(prev_output_2, dropout, v2=True) prev_output_2 = layers[-1].forward_all(cslices, char_masks) prev_output_2 = T.sum(prev_output_2, axis=0) prev_output_2 = prev_output_2 / (1e-6 * T.ones_like(char_lens) + char_lens).dimshuffle(0, 'x') prev_size += char_lstm_dim prev_output = T.concatenate([prev_output, prev_output_2], axis=1) prev_output = apply_dropout(prev_output, dropout) if args.conv != 0: for ind in range(args.clayer): layers.append(GraphCNNTensor( n_in=prev_size, n_out=prev_size, )) residual = True if ind == 0: residual = False prev_output = layers[-1].forward_all(prev_output, up_ids, up_id_masks, bv_ur_slicess[ind], bv_ur_matrixss[ind], b_ur_slicess[ind], b_ur_matrixss[ind], down_ids, down_id_masks, bv_dr_slicess[ind], bv_dr_matrixss[ind], b_dr_slicess[ind], b_dr_matrixss[ind], residual=residual) prev_output = apply_dropout(prev_output, dropout) prev_size *= 3 layers.append( LSTM(n_in=prev_size, n_out=word_lstm_dim, direction='bi' if args.word_bidirect else 'si')) prev_output = prev_output.dimshuffle(0, 'x', 1) prev_output = layers[-1].forward_all(prev_output) prev_output = prev_output.reshape( (prev_output.shape[0], prev_output.shape[-1])) prev_size = word_lstm_dim layers.append( Layer( n_in=prev_size, n_out=args.classes, activation=linear, #ReLU, has_bias=False)) n_tags = args.classes s_len = char_ids.shape[0] tags_scores = layers[-1].forward(prev_output) transitions = shared((n_tags + 2, n_tags + 2), 'transitions') small = -1000 b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32) e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32) observations = T.concatenate([tags_scores, small * T.ones((s_len, 2))], axis=1) observations = T.concatenate([b_s, observations, e_s], axis=0) real_path_score = tags_scores[T.arange(s_len), tag_ids].sum() b_id = theano.shared(value=np.array([n_tags], dtype=np.int32)) e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32)) padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0) pre_ids = T.arange(s_len + 1) s_ids = T.arange(s_len + 1) + 1 real_path_score += transitions[padded_tags_ids[pre_ids], padded_tags_ids[s_ids]].sum() all_paths_scores = CRFForward(observations, transitions) self.nll_loss = nll_loss = -(real_path_score - all_paths_scores) preds = CRFForward(observations, transitions, viterbi=True, return_alpha=False, return_best_sequence=True) self.pred = preds[1:-1] self.l2_sqr = None params = self.params = [transitions] for layer in layers: self.params += layer.params for p in self.params: if self.l2_sqr is None: self.l2_sqr = args.l2_reg * T.sum(p**2) else: self.l2_sqr += args.l2_reg * T.sum(p**2) #for l, i in zip(layers[3:], range(len(layers[3:]))): for l, i in zip( layers[2 + len(r_emb_layers) + len(r_matrix_layers):], range( len(layers[2 + len(r_emb_layers) + len(r_matrix_layers):]))): say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out)) nparams = sum(len(x.get_value(borrow=True).ravel()) \ for x in self.params) say("total # parameters: {}\n".format(nparams)) cost = self.nll_loss + self.l2_sqr lr_method_name = args.learning lr_method_parameters = {} lr_method_parameters['lr'] = args.learning_rate updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters) f_train = theano.function(inputs=self.inputs, outputs=[cost, nll_loss], updates=updates, allow_input_downcast=True) f_eval = theano.function(inputs=self.inputs[:-1], outputs=self.pred, allow_input_downcast=True) return f_train, f_eval
def __theano_build__(self): E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c x_a = T.ivector('x_a') x_b = T.ivector('x_b') y = T.lvector('y') def forward_step(x_t, s_t_prev): # Word embedding layer x_e = E[:, x_t] # GRU layer 1 z_t = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t_prev)) + b[0] r_t = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t_prev)) + b[1] c_t = T.tanh(U[2].dot(x_e) + W[2].dot(s_t_prev * r_t) + b[2]) s_t = (T.ones_like(z_t) - z_t) * c_t + z_t * s_t_prev # directly return the hidden state as intermidate output return [s_t] # sentence a vector (states) a_s, updates = theano.scan(forward_step, sequences=x_a, truncate_gradient=self.bptt_truncate, outputs_info=T.zeros(self.hidden_dim)) # sentence b vector (states) b_s, updates = theano.scan(forward_step, sequences=x_b, truncate_gradient=self.bptt_truncate, outputs_info=T.zeros(self.hidden_dim)) # semantic similarity # s_sim = manhattan_distance(a_s[-1],b_s[-1]) # for classification using simple strategy sena = a_s[-1] senb = b_s[-1] combined_s = T.concatenate([sena, senb], axis=0) # softmax class o = T.nnet.softmax(V.dot(combined_s) + c)[0] # in case the o contains 0 which cause inf eps = np.asarray([1.0e-10] * self.label_dim, dtype=theano.config.floatX) o = o + eps om = o.reshape((1, o.shape[0])) prediction = T.argmax(om, axis=1) o_error = T.nnet.categorical_crossentropy(om, y) # cost cost = T.sum(o_error) # updates updates = sgd_updates_adadelta(norm=0, params=self.params, cost=cost) # monitor parameter mV = V * T.ones_like(V) mc = c * T.ones_like(c) mU = U * T.ones_like(U) mW = W * T.ones_like(W) gV = T.grad(cost, V) gc = T.grad(cost, c) gU = T.grad(cost, U) gW = T.grad(cost, W) mgV = gV * T.ones_like(gV) mgc = gc * T.ones_like(gc) mgU = gU * T.ones_like(gU) mgW = gW * T.ones_like(gW) # Assign functions self.monitor = theano.function([x_a, x_b], [sena, senb, mV, mc, mU, mW]) self.monitor_grad = theano.function([x_a, x_b, y], [mgV, mgc, mgU, mgW]) self.predict = theano.function([x_a, x_b], om) self.predict_class = theano.function([x_a, x_b], prediction) self.ce_error = theano.function([x_a, x_b, y], cost) # self.bptt = theano.function([x,y],[dE,dU,dW,db,dV,dc]) # SGD parameters learning_rate = T.scalar('learning_rate') decay = T.scalar('decay') # rmsprop cache updates # find the nan self.sgd_step = theano.function( [x_a, x_b, y], [], updates=updates # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) )
def custom_objective(y_true, y_pred): 'Custom Objective function' y_true = T.flatten(y_true) y_pred = T.flatten(y_pred) n_seg = 32 # Because we have 32 segments per video. nvid = 60 n_exp = nvid / 2 Num_d = 32 * nvid sub_max = T.ones_like( y_pred ) # sub_max represents the highest scoring instants in bags (videos). sub_sum_labels = T.ones_like( y_true ) # It is used to sum the labels in order to distinguish between normal and abnormal videos. sub_sum_l1 = T.ones_like( y_true ) # For holding the concatenation of summation of scores in the bag. sub_l2 = T.ones_like( y_true) # For holding the concatenation of L2 of score in the bag. for ii in xrange(0, nvid, 1): # For Labels mm = y_true[ii * n_seg:ii * n_seg + n_seg] sub_sum_labels = T.concatenate([ sub_sum_labels, T.stack(T.sum(mm)) ]) # Just to keep track of abnormal and normal vidoes # For Features scores Feat_Score = y_pred[ii * n_seg:ii * n_seg + n_seg] sub_max = T.concatenate( [sub_max, T.stack(T.max(Feat_Score))] ) # Keep the maximum score of scores of all instances in a Bag (video) sub_sum_l1 = T.concatenate([ sub_sum_l1, T.stack(T.sum(Feat_Score)) ]) # Keep the sum of scores of all instances in a Bag (video) z1 = T.ones_like(Feat_Score) z2 = T.concatenate([z1, Feat_Score]) z3 = T.concatenate([Feat_Score, z1]) z_22 = z2[31:] z_44 = z3[:33] z = z_22 - z_44 z = z[1:32] z = T.sum(T.sqr(z)) sub_l2 = T.concatenate([sub_l2, T.stack(z)]) # sub_max[Num_d:] means include all elements after Num_d. # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] # z=x[4:] #[ 6. 12. 7. 18. 9. 14.] sub_score = sub_max[ Num_d:] # We need this step since we have used T.ones_like F_labels = sub_sum_labels[ Num_d:] # We need this step since we have used T.ones_like # F_labels contains integer 32 for normal video and 0 for abnormal videos. This because of labeling done at the end of "load_dataset_Train_batch" # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] # z=x[:4] # [ 2 4 3 9]... This shows 0 to 3 elements sub_sum_l1 = sub_sum_l1[ Num_d:] # We need this step since we have used T.ones_like sub_sum_l1 = sub_sum_l1[:n_exp] sub_l2 = sub_l2[Num_d:] # We need this step since we have used T.ones_like sub_l2 = sub_l2[:n_exp] indx_nor = theano.tensor.eq(F_labels, 32).nonzero( )[0] # Index of normal videos: Since we labeled 1 for each of 32 segments of normal videos F_labels=32 for normal video indx_abn = theano.tensor.eq(F_labels, 0).nonzero()[0] n_Nor = n_exp Sub_Nor = sub_score[indx_nor] # Maximum Score for each of abnormal video Sub_Abn = sub_score[indx_abn] # Maximum Score for each of normal video z = T.ones_like(y_true) for ii in xrange(0, n_Nor, 1): sub_z = T.maximum(1 - Sub_Abn + Sub_Nor[ii], 0) z = T.concatenate([z, T.stack(T.sum(sub_z))]) z = z[Num_d:] # We need this step since we have used T.ones_like z = T.mean(z, axis=-1) + 0.00008 * T.sum(sub_sum_l1) + 0.00008 * T.sum( sub_l2) # Final Loss f return z
def __init__(self, environment, rho=0.9, rms_epsilon=0.0001, momentum=0, clip_delta=0, freeze_interval=1000, batch_size=32, network_type=None, update_rule="rmsprop", batch_accumulator="sum", random_state=np.random.RandomState(), double_Q=False, neural_network=NN): """ Initialize environment """ QNetwork.__init__(self,environment, batch_size) self._rho = rho self._rms_epsilon = rms_epsilon self._momentum = momentum self._clip_delta = clip_delta self._freeze_interval = freeze_interval self._double_Q = double_Q self._random_state = random_state self.update_counter = 0 states=[] # list of symbolic variables for each of the k element in the belief state # --> [ T.tensor4 if observation of element=matrix, T.tensor3 if vector, T.tensor 2 if scalar ] next_states=[] # idem than states at t+1 self.states_shared=[] # list of shared variable for each of the k element in the belief state self.next_states_shared=[] # idem that self.states_shared at t+1 for i, dim in enumerate(self._input_dimensions): if len(dim) == 3: states.append(T.tensor4("%s_%s" % ("state", i))) next_states.append(T.tensor4("%s_%s" % ("next_state", i))) elif len(dim) == 2: states.append(T.tensor3("%s_%s" % ("state", i))) next_states.append(T.tensor3("%s_%s" % ("next_state", i))) elif len(dim) == 1: states.append( T.matrix("%s_%s" % ("state", i)) ) next_states.append( T.matrix("%s_%s" % ("next_state", i)) ) self.states_shared.append(theano.shared(np.zeros((batch_size,) + dim, dtype=theano.config.floatX) , borrow=False)) self.next_states_shared.append(theano.shared(np.zeros((batch_size,) + dim, dtype=theano.config.floatX) , borrow=False)) print("Number of observations per state: {}".format(len(self.states_shared))) print("For each observation, historySize + ponctualObs_i.shape: {}".format(self._input_dimensions)) rewards = T.col('rewards') actions = T.icol('actions') terminals = T.icol('terminals') thediscount = T.scalar(name='thediscount', dtype=theano.config.floatX) thelr = T.scalar(name='thelr', dtype=theano.config.floatX) Q_net=neural_network(self._batch_size, self._input_dimensions, self._n_actions, self._random_state) self.q_vals, self.params, shape_after_conv = Q_net._buildDQN(states) print("Number of neurons after spatial and temporal convolution layers: {}".format(shape_after_conv)) self.next_q_vals, self.next_params, shape_after_conv = Q_net._buildDQN(next_states) self._resetQHat() self.rewards_shared = theano.shared( np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared( np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) self.terminals_shared = theano.shared( np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) if(self._double_Q==True): givens_next={} for i, x in enumerate(self.next_states_shared): givens_next[ states[i] ] = x self.next_q_vals_current_qnet=theano.function([], self.q_vals, givens=givens_next) next_q_curr_qnet = theano.clone(self.next_q_vals) argmax_next_q_vals=T.argmax(next_q_curr_qnet, axis=1, keepdims=True) max_next_q_vals=self.next_q_vals[T.arange(batch_size),argmax_next_q_vals.reshape((-1,))].reshape((-1, 1)) else: max_next_q_vals=T.max(self.next_q_vals, axis=1, keepdims=True) not_terminals=T.ones_like(terminals) - terminals target = rewards + not_terminals * thediscount * max_next_q_vals q_val=self.q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1, 1)) # Note : Strangely (target - q_val) lead to problems with python 3.5, theano 0.8.0rc and floatX=float32... diff = - q_val + target if self._clip_delta > 0: # This loss function implementation is taken from # https://github.com/spragunr/deep_q_rl # If we simply take the squared clipped diff as our loss, # then the gradient will be zero whenever the diff exceeds # the clip bounds. To avoid this, we extend the loss # linearly past the clip point to keep the gradient constant # in that regime. # # This is equivalent to declaring d loss/d q_vals to be # equal to the clipped diff, then backpropagating from # there, which is what the DeepMind implementation does. quadratic_part = T.minimum(abs(diff), self._clip_delta) linear_part = abs(diff) - quadratic_part loss_ind = 0.5 * quadratic_part ** 2 + self._clip_delta * linear_part else: loss_ind = 0.5 * diff ** 2 if batch_accumulator == 'sum': loss = T.sum(loss_ind) elif batch_accumulator == 'mean': loss = T.mean(loss_ind) else: raise ValueError("Bad accumulator: {}".format(batch_accumulator)) givens = { rewards: self.rewards_shared, actions: self.actions_shared, ## actions not needed! terminals: self.terminals_shared } for i, x in enumerate(self.states_shared): givens[ states[i] ] = x for i, x in enumerate(self.next_states_shared): givens[ next_states[i] ] = x gparams=[] for p in self.params: gparam = T.grad(loss, p) gparams.append(gparam) updates = [] if update_rule == 'deepmind_rmsprop': updates = deepmind_rmsprop(loss, self.params, gparams, thelr, self._rho, self._rms_epsilon) elif update_rule == 'rmsprop': for i,(p, g) in enumerate(zip(self.params, gparams)): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - self._rho) * g ** 2 gradient_scaling = T.sqrt(acc_new + self._rms_epsilon) g = g / gradient_scaling updates.append((acc, acc_new)) updates.append((p, p - thelr * g)) elif update_rule == 'sgd': for i, (param, gparam) in enumerate(zip(self.params, gparams)): updates.append((param, param - thelr * gparam)) else: raise ValueError("Unrecognized update: {}".format(update_rule)) if(self._double_Q==True): self._train = theano.function([thediscount, thelr, next_q_curr_qnet], [loss, loss_ind, self.q_vals], updates=updates, givens=givens, on_unused_input='warn') else: self._train = theano.function([thediscount, thelr], [loss, loss_ind, self.q_vals], updates=updates, givens=givens, on_unused_input='warn') givens2={} for i, x in enumerate(self.states_shared): givens2[ states[i] ] = x self._q_vals = theano.function([], self.q_vals, givens=givens2, on_unused_input='warn')
def __init__(self, We_initial, params): self.textfile = open(params.outfile, 'w') We = theano.shared(We_initial) embsize = We_initial.shape[1] hidden = params.hidden start0 = np.random.uniform(-0.02, 0.02, (1, 26)).astype('float32') end0 = np.zeros((1, 26)).astype('float32') end0[0, -1] = 1.0 start = theano.shared(start0) end = theano.shared(end0) l_in_word = lasagne.layers.InputLayer((None, None)) l_mask_word = lasagne.layers.InputLayer(shape=(None, None)) if params.emb == 1: l_emb_word = lasagne.layers.EmbeddingLayer( l_in_word, input_size=We_initial.shape[0], output_size=embsize, W=We) else: l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We) l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word, hidden, mask_input=l_mask_word) l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word, hidden, mask_input=l_mask_word, backwards=True) l_reshapef = lasagne.layers.ReshapeLayer(l_lstm_wordf, (-1, hidden)) l_reshapeb = lasagne.layers.ReshapeLayer(l_lstm_wordb, (-1, hidden)) concat2 = lasagne.layers.ConcatLayer([l_reshapef, l_reshapeb]) l_local = lasagne.layers.DenseLayer( concat2, num_units=25, nonlinearity=lasagne.nonlinearities.linear) f_params = lasagne.layers.get_all_params(l_local, trainable=True) Wyy0 = np.random.uniform(-0.02, 0.02, (26, 26)).astype('float32') Wyy = theano.shared(Wyy0) d_params = lasagne.layers.get_all_params(l_local, trainable=True) d_params.append(Wyy) self.d_params = d_params l_in_word_a = lasagne.layers.InputLayer((None, None)) l_mask_word_a = lasagne.layers.InputLayer(shape=(None, None)) l_emb_word_a = lasagne_embedding_layer_2(l_in_word_a, embsize, l_emb_word.W) if params.dropout: l_emb_word_a = lasagne.layers.DropoutLayer(l_emb_word_a, p=0.5) l_lstm_wordf_a = lasagne.layers.LSTMLayer(l_emb_word_a, hidden, mask_input=l_mask_word_a) l_lstm_wordb_a = lasagne.layers.LSTMLayer(l_emb_word_a, hidden, mask_input=l_mask_word_a, backwards=True) l_reshapef_a = lasagne.layers.ReshapeLayer(l_lstm_wordf_a, (-1, hidden)) l_reshapeb_a = lasagne.layers.ReshapeLayer(l_lstm_wordb_a, (-1, hidden)) concat2_a = lasagne.layers.ConcatLayer([l_reshapef_a, l_reshapeb_a]) if params.dropout: concat2_a = lasagne.layers.DropoutLayer(concat2_a, p=0.5) l_local_a = lasagne.layers.DenseLayer( concat2_a, num_units=25, nonlinearity=lasagne.nonlinearities.softmax) a_params = lasagne.layers.get_all_params(l_local_a, trainable=True) self.a_params = a_params y_in = T.ftensor3() y = T.imatrix() g = T.imatrix() gmask = T.fmatrix() y_mask = T.fmatrix() length = T.iscalar() # shape: n, L, 1 #y1 = T.ftensor3() # shape: n, 1, 46 predy0 = lasagne.layers.get_output(l_local_a, { l_in_word_a: g, l_mask_word_a: gmask }) predy = predy0.reshape((-1, length, 25)) predy = predy * gmask[:, :, None] #newpredy = T.concatenate([predy, y0] , axis=2) # n , L, 46, 46 # predy0: n, L, 25 # energy loss def inner_function(targets_one_step, mask_one_step, prev_label, tg_energy): """ :param targets_one_step: [batch_size, t] :param prev_label: [batch_size, t] :param tg_energy: [batch_size] :return: """ new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1]) new_ta_energy = tg_energy + T.sum(new_ta_energy * targets_one_step, axis=1) tg_energy_t = T.switch(mask_one_step, new_ta_energy, tg_energy) return [targets_one_step, new_ta_energy] # Input should be provided as (n_batch, n_time_steps, num_labels, num_labels) # but scan requires the iterable dimension to be first # So, we need to dimshuffle to (n_time_steps, n_batch, num_labels, num_labels) local_energy = lasagne.layers.get_output(l_local, { l_in_word: g, l_mask_word: gmask }) local_energy = local_energy.reshape((-1, length, 25)) local_energy = local_energy * gmask[:, :, None] targets_shuffled = y_in.dimshuffle(1, 0, 2) masks_shuffled = gmask.dimshuffle(1, 0) target_time0 = targets_shuffled[0] initial_energy0 = T.dot(target_time0, Wyy[-1, :-1]) length_index = T.sum(gmask, axis=1) - 1 length_index = T.cast(length_index, 'int32') l_LM_in = lasagne.layers.InputLayer((None, None, 26)) l_LM_mask = lasagne.layers.InputLayer(shape=(None, None)) l_LM_lstm = lasagne.layers.LSTMLayer(l_LM_in, 2 * hidden, mask_input=l_LM_mask) l_reshape_LM = lasagne.layers.ReshapeLayer(l_LM_lstm, (-1, 2 * hidden)) l_LM = lasagne.layers.DenseLayer( l_reshape_LM, num_units=26, nonlinearity=lasagne.nonlinearities.softmax) LM_params = lasagne.layers.get_all_params(l_LM, trainable=True) LM_params.append(start) f = open('Label_LM.pickle', 'r') data = pickle.load(f) f.close() for idx, p in enumerate(LM_params): p.set_value(data[idx]) initials = [target_time0, initial_energy0] [_, target_energies], _ = theano.scan( fn=inner_function, outputs_info=initials, sequences=[targets_shuffled[1:], masks_shuffled[1:]]) pos_end_target = y_in[T.arange(length_index.shape[0]), length_index] """add ground truth labels LM cost""" pos_predy_tmp0 = y_in[:, :, 0].reshape((-1, length, 1)) pos_tmp0 = T.zeros_like(pos_predy_tmp0) pos_predy_lm = T.concatenate([y_in, pos_tmp0], axis=2) pos_predy_tmp = pos_predy_lm[:, 0, :].reshape((-1, 1, 26)) pos_tmp = T.ones_like(pos_predy_tmp) sos = pos_tmp * (start.dimshuffle('x', 0, 1)) eos = pos_tmp * (end.dimshuffle('x', 0, 1)) pos_y_lm_in = T.concatenate([sos, pos_predy_lm], axis=1) pos_y_lm_out = T.concatenate([pos_predy_lm, eos], axis=1) pos_lm_mask_var = T.concatenate( [pos_tmp[:, 0, 0].reshape((-1, 1)), gmask], axis=1) pos_LM_out = lasagne.layers.get_output(l_LM, { l_LM_in: pos_y_lm_in, l_LM_mask: pos_lm_mask_var }) pos_LM_out = pos_LM_out.reshape((-1, length + 1, 26)) pos_LM_cost = T.sum(T.log( T.sum(pos_LM_out[:, :-1, :] * pos_y_lm_out[:, :-1, :], axis=2) + eps) * gmask, axis=1) pos_cost = target_energies[-1] + T.sum( T.sum(local_energy * y_in, axis=2) * gmask, axis=1) + T.dot( pos_end_target, Wyy[:-1, -1]) + params.lm * pos_LM_cost check = T.sum(T.sum(local_energy * y_in, axis=2) * gmask, axis=1) negtargets_shuffled = predy.dimshuffle(1, 0, 2) negtarget_time0 = negtargets_shuffled[0] neginitial_energy0 = T.dot(negtarget_time0, Wyy[-1, :-1]) """predict label language cost""" neg_predy_tmp0 = predy[:, :, 0].reshape((-1, length, 1)) neg_tmp0 = T.zeros_like(neg_predy_tmp0) neg_predy_lm = T.concatenate([predy, neg_tmp0], axis=2) neg_predy_tmp = neg_predy_lm[:, 0, :].reshape((-1, 1, 26)) neg_tmp = T.ones_like(neg_predy_tmp) sos = neg_tmp * (start.dimshuffle('x', 0, 1)) eos = neg_tmp * (end.dimshuffle('x', 0, 1)) neg_y_lm_in = T.concatenate([sos, neg_predy_lm], axis=1) neg_y_lm_out = T.concatenate([neg_predy_lm, eos], axis=1) neg_lm_mask_var = T.concatenate( [neg_tmp[:, 0, 0].reshape((-1, 1)), gmask], axis=1) neg_LM_out = lasagne.layers.get_output(l_LM, { l_LM_in: neg_y_lm_in, l_LM_mask: neg_lm_mask_var }) neg_LM_out = neg_LM_out.reshape((-1, length + 1, 26)) neg_LM_cost = T.sum(T.log( T.sum(neg_LM_out[:, :-1, :] * neg_y_lm_out[:, :-1, :], axis=2) + eps) * gmask, axis=1) neginitials = [negtarget_time0, neginitial_energy0] [_, negtarget_energies], _ = theano.scan( fn=inner_function, outputs_info=neginitials, sequences=[negtargets_shuffled[1:], masks_shuffled[1:]]) neg_end_target = predy[T.arange(length_index.shape[0]), length_index] neg_cost = negtarget_energies[-1] + T.sum( T.sum(local_energy * predy, axis=2) * gmask, axis=1) + T.dot( neg_end_target, Wyy[:-1, -1]) + params.lm * neg_LM_cost y_f = y.flatten() predy_f = predy.reshape((-1, 25)) ce_hinge = lasagne.objectives.categorical_crossentropy( predy_f + eps, y_f) ce_hinge = ce_hinge.reshape((-1, length)) ce_hinge = T.sum(ce_hinge * gmask, axis=1) entropy_term = -T.sum(predy_f * T.log(predy_f + eps), axis=1) entropy_term = entropy_term.reshape((-1, length)) entropy_term = T.sum(entropy_term * gmask, axis=1) delta0 = T.sum(abs((y_in - predy)), axis=2) * gmask delta0 = T.sum(delta0, axis=1) hinge_cost = delta0 + neg_cost - pos_cost hinge_cost = hinge_cost * T.gt(hinge_cost, 0) d_cost = T.mean(hinge_cost) d_cost0 = d_cost """select different regulizer""" g_cost = -d_cost0 + params.l2 * sum( lasagne.regularization.l2(x) for x in a_params) + params.l3 * T.mean(ce_hinge) ###g_cost = -d_cost0 + params.L2* sum(lasagne.regularization.l2(x) for x in a_params) - params.L31*T.mean(entropy_term) d_cost = d_cost0 + params.l2 * sum( lasagne.regularization.l2(x) for x in d_params) self.a_params = a_params updates_g = lasagne.updates.sgd(g_cost, a_params, params.eta) updates_g = lasagne.updates.apply_momentum(updates_g, a_params, momentum=0.9) self.train_g = theano.function( [g, gmask, y, y_in, length], [g_cost, d_cost0, pos_cost, neg_cost, delta0, check], updates=updates_g, on_unused_input='ignore') updates_d = lasagne.updates.adam(d_cost, d_params, 0.001) self.train_d = theano.function( [g, gmask, y, y_in, length], [d_cost, d_cost0, pos_cost, neg_cost, delta0, check], updates=updates_d, on_unused_input='ignore') # test the model and retuning the infernce network predy_test = lasagne.layers.get_output(l_local_a, { l_in_word_a: g, l_mask_word_a: gmask }, deterministic=True) predy_test = predy_test.reshape((-1, length, 25)) pred = T.argmax(predy_test, axis=2) pg = T.eq(pred, y) pg = pg * gmask acc = 1.0 * T.sum(pg) / T.sum(gmask) negtargets_shuffled_test = predy_test.dimshuffle(1, 0, 2) negtarget_time0_test = negtargets_shuffled_test[0] neginitial_energy0_test = T.dot(negtarget_time0_test, Wyy[-1, :-1]) neginitials_test = [negtarget_time0_test, neginitial_energy0_test] [_, negtarget_energies_test], _ = theano.scan( fn=inner_function, outputs_info=neginitials_test, sequences=[negtargets_shuffled_test[1:], masks_shuffled[1:]]) end_test_target = predy_test[T.arange(length_index.shape[0]), length_index] neg_cost_test = negtarget_energies_test[-1] + T.sum( T.sum(local_energy * predy_test, axis=2) * gmask, axis=1) + T.dot( end_test_target, Wyy[:-1, -1]) test_cost = -T.mean(neg_cost_test) + params.l3 * T.mean( ce_hinge) - params.lm * T.mean(neg_LM_cost) test_updates = lasagne.updates.sgd(test_cost, a_params, params.eta) test_updates = lasagne.updates.apply_momentum(test_updates, a_params, momentum=0.9) self.test_time_turning = theano.function([g, gmask, y, length], test_cost, updates=test_updates, on_unused_input='ignore') self.test_time1 = theano.function([g, gmask, y, y_in, length], [ acc, T.mean(neg_cost), T.mean(pos_cost), params.l3 * T.mean(ce_hinge) ], on_unused_input='ignore') self.test_time = theano.function([g, gmask, y, length], acc) self.test_time2 = theano.function([g, gmask, length], pred)
def _get_cost( self, output, truth, S, B, C, rescore=False, lmbda_coord=5., lmbda_noobj=0.5, lmbda_obj=1., min_overlap=1e-5, use_overlap=False ): ''' Calculates cost for multiple objects in a scene without for loops or scan (so reduces the amount of variable created in the theano computation graph). A cell is associated with a certain object if the iou of that cell and the object is higher than any other ground truth object. and the rest of the objectness scores are pushed towards zero. Returns the cost and list of variable that I don't want to backpropagate through. Params: ------ use_overlap: Yolo, as described in the original paper, assigns a ground truth label if the ground truth box overlaps at all with the cell. I've found that the result is that with new images with many smaller objects because several objects might be overlap a single cell, this causes a sort of average bounding box which looks pretty bad. So by using overlap, you don't assign a cell to a ground truth label unless it overlaps by some semi-significant amount. ''' # calculate height/width of individual cell block_height, block_width = 1. / S[0], 1./ S[1] # get the offset of each cell offset_x, offset_y = meshgrid2D(T.arange(0,1,block_width), T.arange(0,1,block_height)) # get indices for x,y,w,h,object-ness for easy access x_idx, y_idx = T.arange(0,5*B,5), T.arange(1,5*B, 5) w_idx, h_idx = T.arange(2,5*B,5), T.arange(3,5*B,5) conf_idx = T.arange(4,5*B,5) # Get position predictions with offsets. pred_x = (output[:,x_idx] + offset_x.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3) pred_y = (output[:,y_idx] + offset_y.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3) pred_w, pred_h = output[:,w_idx].dimshuffle(0,'x',1,2,3), output[:,h_idx].dimshuffle(0,'x',1,2,3) #pred_w, pred_h = T.exp(pred_w), T.exp(pred_h) pred_conf = output[:,conf_idx].dimshuffle(0,'x',1,2,3) pred_class = output[:,-C:].dimshuffle(0,'x',1,2,3) #pred_w, pred_h = T.maximum(pred_w, 0.), T.maximum(pred_h, 0.) x_idx, y_idx = T.arange(0,truth.shape[1],4+C), T.arange(1,truth.shape[1],4+C) w_idx, h_idx = T.arange(2,truth.shape[1],4+C), T.arange(3,truth.shape[1],4+C) class_idx,_ = theano.scan( lambda x: T.arange(x,x+C,1), sequences = T.arange(4,truth.shape[1],4+C) ) truth_x, truth_y = truth[:,x_idx], truth[:,y_idx] truth_w, truth_h = truth[:,w_idx], truth[:,h_idx] truth_class = truth[:, class_idx] # Get intersection region bounding box coordinates xi = T.maximum(pred_x, truth_x.dimshuffle(0,1,'x','x','x')) xf = T.minimum(pred_x + pred_w, (truth_x + truth_w).dimshuffle(0,1,'x','x','x')) yi = T.maximum(pred_y, truth_y.dimshuffle(0,1,'x','x','x')) yf = T.minimum(pred_y + pred_h, (truth_y + truth_h).dimshuffle(0,1,'x','x','x')) w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.) # Calculate iou score for predicted boxes and truth isec = w * h union = (pred_w * pred_h) + (truth_w * truth_h).dimshuffle(0,1,'x','x','x') - isec iou = T.maximum(isec/union, 0.) # Calculate rmse for boxes which have 0 iou score squared_error = (pred_x - truth_x.dimshuffle(0,1,'x','x','x'))**2 + (pred_y - truth_y.dimshuffle(0,1,'x','x','x'))**2 + \ (pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2 + (pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2 # Get index matrix representing max along the 1st dimension for the iou score (reps 'responsible' box). maxval_idx, _ = meshgrid2D(T.arange(B), T.arange(truth.shape[0])) maxval_idx = maxval_idx.dimshuffle(0,'x',1,'x','x') maxval_idx = T.repeat(T.repeat(maxval_idx,S[0],3),S[1],4) # determine which box is responsible by giving box with highest iou score (if iou > 0) or smalles squared error. greater_iou = T.eq(maxval_idx, iou.argmax(axis=2).dimshuffle(0,1,'x',2,3)) smaller_se = T.eq(maxval_idx, squared_error.argmin(axis=2).dimshuffle(0,1,'x',2,3)) box_is_resp = T.switch(iou.max(axis=2, keepdims=True) > 0, greater_iou, smaller_se) # Get matrix for the width/height of each cell width, height = T.ones(S) / S[1], T.ones(S) / S[0] width, height = width.dimshuffle('x','x',0,1), height.dimshuffle('x','x',0,1) offset_x, offset_y = offset_x.dimshuffle('x','x',0,1), offset_y.dimshuffle('x','x',0,1) # Get bounding box for intersection between CELL and ground truth box. xi = T.maximum(offset_x, truth_x.dimshuffle(0,1,'x','x')) xf = T.minimum(offset_x + width, (truth_x + truth_w).dimshuffle(0,1,'x','x')) yi = T.maximum(offset_y, truth_y.dimshuffle(0,1,'x','x')) yf = T.minimum(offset_y + height, (truth_y + truth_h).dimshuffle(0,1,'x','x')) w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.) # Calculate iou score for the cell. isec = w * h if not use_overlap: union = (width * height) + (truth_w* truth_h).dimshuffle(0,1,'x','x') - isec iou_cell = T.maximum(isec/union, 0.).dimshuffle(0,1,'x',2,3) # * (np.prod(S)) # normalize the iou to make more sense else: iou_cell = T.maximum(isec / (width * height), 0.).dimshuffle(0,1,'x',2,3) maxval_idx, _ = meshgrid2D(T.arange(iou_cell.shape[1]), T.arange(iou_cell.shape[0])) maxval_idx = maxval_idx.dimshuffle(0,1,'x','x','x') maxval_idx = T.repeat(T.repeat(T.repeat(maxval_idx, B, 2), S[0], 3), S[1], 4) obj_for_cell = T.eq(maxval_idx, iou_cell.argmax(axis=1).dimshuffle(0,'x',1,2,3)) # Get logical matrix representing minimum iou score for cell to be considered overlapping ground truth. cell_intersects = (iou_cell > min_overlap) obj_in_cell_and_resp = T.bitwise_and(T.bitwise_and(cell_intersects, box_is_resp), obj_for_cell) conf_is_zero = T.bitwise_and( bitwise_not(T.bitwise_and(cell_intersects, box_is_resp)), obj_for_cell ) conf_is_zero = conf_is_zero.sum(axis=1, keepdims=True) # repeat "cell overlaps" logical matrix for the number of classes. pred_class = T.repeat(pred_class, truth.shape[1] // (4 + C), axis=1) # repeat the ground truth for class probabilities for each cell. truth_class_rep = T.repeat(T.repeat(truth_class.dimshuffle(0,1,2,'x','x'), S[0], axis=3), S[1], axis=4) cell_intersects = T.repeat(cell_intersects, C, axis=2) if not rescore: iou = T.ones_like(iou) cost = T.sum((pred_conf - iou)[obj_in_cell_and_resp.nonzero()]**2) + \ lmbda_noobj * T.sum((pred_conf[conf_is_zero.nonzero()])**2) + \ lmbda_coord * T.sum((pred_x - truth_x.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \ lmbda_coord * T.sum((pred_y - truth_y.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \ lmbda_coord * T.sum((safe_sqrt(pred_w) - safe_sqrt(truth_w.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \ lmbda_coord * T.sum((safe_sqrt(pred_h) - safe_sqrt(truth_h.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \ lmbda_obj * T.sum(((pred_class - truth_class_rep)[cell_intersects.nonzero()])**2) cost /= T.maximum(1., truth.shape[0]) return cost, [iou]
def __init__(self, model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_): super(Distillation, self).__init__(model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_) # create a small convolutional neural network ### Load expert policy files self._expert_policies = [] file_name_ = "" for i in range(len(self.getSettings()['expert_policy_files'])): file_name = self.getSettings( )['expert_policy_files'][i] + '/' + self.getSettings( )['model_type'] + '/' + getAgentName() + '.pkl' if (file_name_ == file_name): ## To help save memory when experts are the same # model_ = self._expert_policies[len(self._expert_policies)-1] self._expert_policies.append(model_) else: print("Loading pre compiled network: ", file_name) f = open(file_name, 'rb') model_ = dill.load(f) # model.setSettings(settings) f.close() self._expert_policies.append(model_) file_name_ = file_name self._actor_buffer_states = [] self._actor_buffer_result_states = [] self._actor_buffer_actions = [] self._actor_buffer_rewards = [] self._actor_buffer_falls = [] self._actor_buffer_diff = [] self._NotFallen = T.bcol("Not_Fallen") ## because float64 <= float32 * int32, need to use int16 or int8 self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1), dtype=np.dtype('int8')) self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1), dtype='int8'), broadcastable=(False, True)) self._tmp_diff = T.col("Tmp_Diff") self._tmp_diff.tag.test_value = np.zeros( (self._batch_size, 1), dtype=np.dtype(self.getSettings()['float_type'])) self._tmp_diff_shared = theano.shared(np.zeros( (self._batch_size, 1), dtype=self.getSettings()['float_type']), broadcastable=(False, True)) """ self._target_shared = theano.shared( np.zeros((self._batch_size, 1), dtype='float64'), broadcastable=(False, True)) """ self._critic_regularization_weight = self.getSettings( )["critic_regularization_weight"] self._critic_learning_rate = self.getSettings()["critic_learning_rate"] ## Target network self._modelTarget = copy.deepcopy(model) self._q_valsA = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsA_drop = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_valsNextState = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) self._q_valsTargetNextState = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) self._q_valsTarget = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsTarget_drop = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_valsActA = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActTarget = lasagne.layers.get_output( self._modelTarget.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActA_drop = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_func = self._q_valsA self._q_funcTarget = self._q_valsTarget self._q_func_drop = self._q_valsA_drop self._q_funcTarget_drop = self._q_valsTarget_drop self._q_funcAct = self._q_valsActA self._q_funcAct_drop = self._q_valsActA_drop # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen # self._target = self._model.getRewardSymbolicVariable() + ((self._discount_factor * self._q_valsTargetNextState ) * self._NotFallen) + (self._NotFallen - 1) self._target = self._model.getRewardSymbolicVariable() + ( self._discount_factor * self._q_valsTargetNextState) self._diff = self._target - self._q_func self._diff_drop = self._target - self._q_func_drop # loss = 0.5 * self._diff ** 2 loss = T.pow(self._diff, 2) self._loss = T.mean(loss) self._loss_drop = T.mean(0.5 * self._diff_drop**2) self._params = lasagne.layers.helper.get_all_params( self._model.getCriticNetwork()) self._actionParams = lasagne.layers.helper.get_all_params( self._model.getActorNetwork()) self._givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._NotFallen: self._NotFallen_shared # self._model.getActionSymbolicVariable(): self._actions_shared, } self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), self._model.getActionSymbolicVariable(): self._model.getActions(), # self._NotFallen: self._NotFallen_shared self._tmp_diff: self._tmp_diff_shared } self._critic_regularization = ( self._critic_regularization_weight * lasagne.regularization.regularize_network_params( self._model.getCriticNetwork(), lasagne.regularization.l2)) self._actor_regularization = ( (self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getActorNetwork(), lasagne.regularization.l2))) if (self.getSettings()['use_previous_value_regularization']): self._actor_regularization = self._actor_regularization + ( (self.getSettings()['previous_value_regularization_weight']) * change_penalty(self._model.getActorNetwork(), self._modelTarget.getActorNetwork())) elif ('regularization_type' in self.getSettings() and (self.getSettings()['regularization_type'] == 'KL_Divergence')): self._kl_firstfixed = T.mean( kl( self._q_valsActTarget, T.ones_like(self._q_valsActTarget) * self.getSettings()['exploration_rate'], self._q_valsActA, T.ones_like(self._q_valsActA) * self.getSettings()['exploration_rate'], self._action_length)) #self._actor_regularization = (( self._KL_Weight ) * self._kl_firstfixed ) + (10*(self._kl_firstfixed>self.getSettings()['kl_divergence_threshold'])* # T.square(self._kl_firstfixed-self.getSettings()['kl_divergence_threshold'])) self._actor_regularization = (self._kl_firstfixed) * ( self.getSettings()['kl_divergence_threshold']) print("Using regularization type : ", self.getSettings()['regularization_type']) # SGD update # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho, # self._rms_epsilon) self._value_grad = T.grad(self._loss + self._critic_regularization, self._params) ## Clipping the max gradient """ for x in range(len(self._value_grad)): self._value_grad[x] = T.clip(self._value_grad[x] , -0.1, 0.1) """ if (self.getSettings()['optimizer'] == 'rmsprop'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.rmsprop(self._value_grad, self._params, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.momentum( self._value_grad, self._params, self._critic_learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adam(self._value_grad, self._params, self._critic_learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adagrad( self._value_grad, self._params, self._critic_learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) ## TD update """ if (self.getSettings()['optimizer'] == 'rmsprop'): self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + self._critic_regularization, self._params, self._critic_learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._updates_ = lasagne.updates.momentum(T.mean(self._q_func) + self._critic_regularization, self._params, self._critic_learning_rate * -T.mean(self._diff), momentum=self._rho) elif ( self.getSettings()['optimizer'] == 'adam'): self._updates_ = lasagne.updates.adam(T.mean(self._q_func), self._params, self._critic_learning_rate * -T.mean(self._diff), beta1=0.9, beta2=0.999, epsilon=1e-08) else: print ("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) """ ## Need to perform an element wise operation or replicate _diff for this to work properly. # self._actDiff = theano.tensor.elemwise.Elemwise(theano.scalar.mul)((self._model.getActionSymbolicVariable() - self._q_valsActA), theano.tensor.tile((self._diff * (1.0/(1.0-self._discount_factor))), self._action_length)) # Target network does not work well here? self._actDiff = (self._model.getActionSymbolicVariable() - self._q_valsActA_drop) # self._actDiff = ((self._model.getActionSymbolicVariable() - self._q_valsActA)) # Target network does not work well here? # self._actDiff_drop = ((self._model.getActionSymbolicVariable() - self._q_valsActA_drop)) # Target network does not work well here? ## This should be a single column vector # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.transpose(T.sum(T.pow(self._actDiff, 2),axis=1) )), (self._diff * (1.0/(1.0-self._discount_factor)))) # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.reshape(T.sum(T.pow(self._actDiff, 2),axis=1), (self._batch_size, 1) )), # (self._tmp_diff * (1.0/(1.0-self._discount_factor))) # self._actLoss_ = (T.mean(T.pow(self._actDiff, 2),axis=1)) self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)( (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff)) # self._actLoss = T.sum(self._actLoss)/float(self._batch_size) self._actLoss = T.mean(self._actLoss_) # self._actLoss_drop = (T.sum(0.5 * self._actDiff_drop ** 2)/float(self._batch_size)) # because the number of rows can shrink # self._actLoss_drop = (T.mean(0.5 * self._actDiff_drop ** 2)) self._policy_grad = T.grad(self._actLoss + self._actor_regularization, self._actionParams) ## Clipping the max gradient """ for x in range(len(self._policy_grad)): self._policy_grad[x] = T.clip(self._policy_grad[x] , -0.5, 0.5) """ if (self.getSettings()['optimizer'] == 'rmsprop'): self._actionUpdates = lasagne.updates.rmsprop( self._policy_grad, self._actionParams, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._actionUpdates = lasagne.updates.momentum(self._policy_grad, self._actionParams, self._learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): self._actionUpdates = lasagne.updates.adam( self._policy_grad, self._actionParams, self._learning_rate, beta1=0.9, beta2=0.999, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): self._actionUpdates = lasagne.updates.adagrad( self._policy_grad, self._actionParams, self._learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) # actionUpdates = lasagne.updates.rmsprop(T.mean(self._q_funcAct_drop) + # (self._regularization_weight * lasagne.regularization.regularize_network_params( # self._model.getActorNetwork(), lasagne.regularization.l2)), actionParams, # self._learning_rate * 0.5 * (-T.sum(actDiff_drop)/float(self._batch_size)), self._rho, self._rms_epsilon) self._givens_grad = { self._model.getStateSymbolicVariable(): self._model.getStates(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._model.getActionSymbolicVariable(): self._model.getActions(), } ### Noisey state updates # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen # self._target_dyna = theano.gradient.disconnected_grad(self._q_func) ## Bellman error self._bellman = self._target - self._q_funcTarget # self._target = self._model.getRewardSymbolicVariable() + (self._discount_factor * self._q_valsTargetNextState ) ### Give v(s') the next state and v(s) (target) the current state self._diff_adv = (self._discount_factor * self._q_func) - (self._q_valsTargetNextState) self._diff_adv_givens = { self._model.getStateSymbolicVariable(): self._model.getResultStates(), self._model.getResultStateSymbolicVariable(): self._model.getStates(), } Distillation.compile(self)
def get_output_mask(self, train=False): X = self.get_input(train) return T.any(T.ones_like(X) * (1. - T.eq(X, self.mask_value)), axis=-1)
def __init__(self, model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_): super(Distillation, self).__init__(model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_) # create a small convolutional neural network ### Load expert policy files self._expert_policies = [] file_name_ = "" for i in range(len(self.getSettings()['expert_policy_files'])): file_name = self.getSettings( )['expert_policy_files'][i] + '/' + self.getSettings( )['model_type'] + '/' + getAgentName() + '.pkl' if (file_name_ == file_name): ## To help save memory when experts are the same self._expert_policies.append(model_) else: print("Loading pre compiled network: ", file_name) f = open(file_name, 'rb') model_ = dill.load(f) f.close() self._expert_policies.append( model_) # expert model, load the 2 expert models file_name_ = file_name self._actor_buffer_states = [] self._actor_buffer_result_states = [] self._actor_buffer_actions = [] self._actor_buffer_rewards = [] self._actor_buffer_falls = [] self._actor_buffer_diff = [] self._NotFallen = T.bcol("Not_Fallen") ## because float64 <= float32 * int32, need to use int16 or int8 self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1), dtype=np.dtype('int8')) self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1), dtype='int8'), broadcastable=(False, True)) self._tmp_diff = T.col("Tmp_Diff") self._tmp_diff.tag.test_value = np.zeros( (self._batch_size, 1), dtype=np.dtype(self.getSettings()['float_type'])) self._tmp_diff_shared = theano.shared( np.zeros((self._batch_size, 1), dtype=self.getSettings()['float_type']), broadcastable=(False, True)) #定义一个共享变量,初始值为为0 self._critic_regularization_weight = self.getSettings( )["critic_regularization_weight"] self._critic_learning_rate = self.getSettings()["critic_learning_rate"] ## Target network self._modelTarget = copy.deepcopy(model) # target model 是要更新的模型 self._q_valsA = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #确定性原始模型的state值输出 self._q_valsA_drop = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #非确定的state值输出 self._q_valsNextState = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) #下一步的state值 self._q_valsTargetNextState = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) #目标模型的下一步的state值 self._q_valsTarget = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #目标模型的state值 self._q_valsTarget_drop = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #目标模型的state self._q_valsActA = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActTarget = lasagne.layers.get_output( self._modelTarget.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #remove the random self._q_valsActA_drop = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #actor 值 self._q_func = self._q_valsA self._q_funcTarget = self._q_valsTarget self._q_func_drop = self._q_valsA_drop self._q_funcTarget_drop = self._q_valsTarget_drop self._q_funcAct = self._q_valsActA self._q_funcAct_drop = self._q_valsActA_drop self._target = self._model.getRewardSymbolicVariable() + ( self._discount_factor * self._q_valsTargetNextState) # self._model.getRewardSymbolicVariable() 获取rewards的值getRewards() =self._rewards_shared 从0开始一直更新 self._diff = self._target - self._q_func self._diff_drop = self._target - self._q_func_drop #更新的模型的reward减去原始模型的critic的输出值 loss = T.pow(self._diff, 2) self._loss = T.mean(loss) # 两个模型的reward的差值 self._loss_drop = T.mean(0.5 * self._diff_drop**2) self._params = lasagne.layers.helper.get_all_params( self._model.getCriticNetwork()) self._actionParams = lasagne.layers.helper.get_all_params( self._model.getActorNetwork()) self._givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model.getRewardSymbolicVariable(): self._model.getRewards() } self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._tmp_diff: self._tmp_diff_shared } self._critic_regularization = ( self._critic_regularization_weight * lasagne.regularization.regularize_network_params( self._model.getCriticNetwork(), lasagne.regularization.l2)) self._actor_regularization = ( (self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getActorNetwork(), lasagne.regularization.l2))) if (self.getSettings()['use_previous_value_regularization']): self._actor_regularization = self._actor_regularization + ( (self.getSettings()['previous_value_regularization_weight']) * change_penalty(self._model.getActorNetwork(), self._modelTarget.getActorNetwork())) elif ('regularization_type' in self.getSettings() and (self.getSettings()['regularization_type'] == 'KL_Divergence')): self._kl_firstfixed = T.mean( kl( self._q_valsActTarget, T.ones_like(self._q_valsActTarget) * self.getSettings()['exploration_rate'], self._q_valsActA, T.ones_like(self._q_valsActA) * self.getSettings()['exploration_rate'], self._action_length)) self._actor_regularization = (self._kl_firstfixed) * ( self.getSettings()['kl_divergence_threshold']) print("Using regularization type : ", self.getSettings()['regularization_type']) # SGD update self._value_grad = T.grad(self._loss + self._critic_regularization, self._params) if (self.getSettings()['optimizer'] == 'rmsprop'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.rmsprop(self._value_grad, self._params, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.momentum( self._value_grad, self._params, self._critic_learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adam(self._value_grad, self._params, self._critic_learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adagrad( self._value_grad, self._params, self._critic_learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) ## TD update ## Need to perform an element wise operation or replicate _diff for this to work properly. self._actDiff = (self._model.getActionSymbolicVariable() - self._q_valsActA_drop) # 更新模型的actor的输出减去原始模型的actor值 self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)( (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff)) self._actLoss = T.mean(self._actLoss_) self._policy_grad = T.grad(self._actLoss + self._actor_regularization, self._actionParams) ## Clipping the max gradient if (self.getSettings()['optimizer'] == 'rmsprop'): self._actionUpdates = lasagne.updates.rmsprop( self._policy_grad, self._actionParams, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._actionUpdates = lasagne.updates.momentum(self._policy_grad, self._actionParams, self._learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): self._actionUpdates = lasagne.updates.adam( self._policy_grad, self._actionParams, self._learning_rate, beta1=0.9, beta2=0.999, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): self._actionUpdates = lasagne.updates.adagrad( self._policy_grad, self._actionParams, self._learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) self._givens_grad = { self._model.getStateSymbolicVariable(): self._model.getStates() } ## Bellman error self._bellman = self._target - self._q_funcTarget ### Give v(s') the next state and v(s) (target) the current state self._diff_adv = (self._discount_factor * self._q_func) - ( self._q_valsTargetNextState ) #\gamma*critic模型的输出-critic模型在下一个状态的输出值 self._diff_adv_givens = { self._model.getStateSymbolicVariable(): self._model.getResultStates(), self._model.getResultStateSymbolicVariable(): self._model.getStates(), } Distillation.compile(self)
def train_conv_net(datasets, U, ofile, cv=0, attr=0, img_w=300, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=25, batch_size=50, lr_decay=0.95, conv_non_linear="relu", activations=[Iden], sqr_norm_lim=9, non_static=True): """ Train a simple conv net img_h = sentence length (padded where necessary) img_w = word vector length (300 for word2vec) filter_hs = filter window sizes hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer sqr_norm_lim = s^2 in the paper lr_decay = adadelta decay parameter """ rng = np.random.RandomState(3435) img_h = len(datasets[0][0][0]) filter_w = img_w feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) parameters = [("image shape", img_h, img_w), ("filter shape", filter_shapes), ("hidden_units", hidden_units), ("dropout", dropout_rate), ("batch_size", batch_size), ("non_static", non_static), ("learn_decay", lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim), ("shuffle_batch", shuffle_batch)] print(parameters) # define model architecture index = T.lscalar() x = T.tensor3('x', dtype=theano.config.floatX) y = T.ivector('y') mair = T.dmatrix('mair') Words = theano.shared(value=U, name="Words") zero_vec_tensor = T.vector(dtype=theano.config.floatX) zero_vec = np.zeros(img_w, dtype=theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[ (Words, T.set_subtensor(Words[0, :], zero_vec_tensor)) ], allow_input_downcast=True) conv_layers = [] for i in range(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, image_shape=None, filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) conv_layers.append(conv_layer) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0], x.shape[1], x.shape[2], Words.shape[1])) def convolve_user_statuses(statuses): layer1_inputs = [] def sum_mat(mat, out): z = ifelse( T.neq(T.sum(mat, dtype=theano.config.floatX), T.constant(0, dtype=theano.config.floatX)), T.constant(1, dtype=theano.config.floatX), T.constant(0, dtype=theano.config.floatX)) return out + z, theano.scan_module.until( T.eq(z, T.constant(0, dtype=theano.config.floatX))) status_count, _ = theano.scan(fn=sum_mat, sequences=statuses, outputs_info=T.constant( 0, dtype=theano.config.floatX)) # Slice-out dummy (zeroed) sentences relv_input = statuses[:T.cast(status_count[-1], dtype='int32' )].dimshuffle(0, 'x', 1, 2) for conv_layer in conv_layers: layer1_inputs.append( conv_layer.set_input(input=relv_input).flatten(2)) features = T.concatenate(layer1_inputs, axis=1) avg_feat = T.max(features, axis=0) return avg_feat conv_feats, _ = theano.scan(fn=convolve_user_statuses, sequences=layer0_input) # Add Mairesse features layer1_input = T.concatenate([conv_feats, mair], axis=1) ##mairesse_change hidden_units[0] = feature_maps * len(filter_hs) + datasets[4].shape[ 1] ##mairesse_change classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) svm_data = T.concatenate( [classifier.layers[0].output, y.dimshuffle(0, 'x')], axis=1) # define parameters of the model and update functions using adadelta params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: # if word vectors are allowed to change, add them as model parameters params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) # shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate # extra data (at random) np.random.seed(3435) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size rand_perm = np.random.permutation(range(len(datasets[0]))) train_set_x = datasets[0][rand_perm] train_set_y = datasets[1][rand_perm] train_set_m = datasets[4][rand_perm] extra_data_x = train_set_x[:extra_data_num] extra_data_y = train_set_y[:extra_data_num] extra_data_m = train_set_m[:extra_data_num] new_data_x = np.append(datasets[0], extra_data_x, axis=0) new_data_y = np.append(datasets[1], extra_data_y, axis=0) new_data_m = np.append(datasets[4], extra_data_m, axis=0) else: new_data_x = datasets[0] new_data_y = datasets[1] new_data_m = datasets[4] rand_perm = np.random.permutation(range(len(new_data_x))) new_data_x = new_data_x[rand_perm] new_data_y = new_data_y[rand_perm] new_data_m = new_data_m[rand_perm] n_batches = new_data_x.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) # divide train set into train/val sets test_set_x = datasets[2] test_set_y = np.asarray(datasets[3], "int32") test_set_m = datasets[5] train_set_x, train_set_y, train_set_m = shared_dataset( (new_data_x[:n_train_batches * batch_size], new_data_y[:n_train_batches * batch_size], new_data_m[:n_train_batches * batch_size])) val_set_x, val_set_y, val_set_m = shared_dataset( (new_data_x[n_train_batches * batch_size:], new_data_y[n_train_batches * batch_size:], new_data_m[n_train_batches * batch_size:])) n_val_batches = n_batches - n_train_batches val_model = theano.function( [index], classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size], mair: val_set_m[index * batch_size:(index + 1) * batch_size] }, ##mairesse_change allow_input_downcast=False) # compile theano functions to get train/val/test errors test_model = theano.function( [index], [classifier.errors(y), svm_data], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], mair: train_set_m[index * batch_size:(index + 1) * batch_size] }, ##mairesse_change allow_input_downcast=True) train_model = theano.function( [index], cost, updates=grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], mair: train_set_m[index * batch_size:(index + 1) * batch_size] }, ##mairesse_change allow_input_downcast=True) test_y_pred = classifier.predict(layer1_input) test_error = T.sum(T.neq(test_y_pred, y), dtype=theano.config.floatX) true_p = T.sum(test_y_pred * y, dtype=theano.config.floatX) false_p = T.sum(test_y_pred * T.mod(y + T.ones_like(y, dtype=theano.config.floatX), T.constant(2, dtype='int32'))) false_n = T.sum( y * T.mod(test_y_pred + T.ones_like(y, dtype=theano.config.floatX), T.constant(2, dtype='int32'))) test_model_all = theano.function( [ x, y, mair ##mairesse_change ], [test_error, true_p, false_p, false_n, svm_data], allow_input_downcast=True) test_batches = test_set_x.shape[0] / batch_size # start training over mini-batches print('... training') epoch = 0 best_val_perf = 0 val_perf = 0 test_perf = 0 fscore = 0 cost_epoch = 0 while (epoch < n_epochs): start_time = time.time() epoch = epoch + 1 if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in range(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) train_losses = [test_model(i) for i in range(n_train_batches)] train_perf = 1 - np.mean([loss[0] for loss in train_losses]) val_losses = [val_model(i) for i in range(n_val_batches)] val_perf = 1 - np.mean(val_losses) epoch_perf = 'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%' % ( epoch, time.time() - start_time, train_perf * 100., val_perf * 100.) print(epoch_perf) ofile.write(epoch_perf + "\n") ofile.flush() if val_perf >= best_val_perf: best_val_perf = val_perf test_loss_list = [ test_model_all( test_set_x[idx * batch_size:(idx + 1) * batch_size], test_set_y[idx * batch_size:(idx + 1) * batch_size], test_set_m[idx * batch_size:(idx + 1) * batch_size] ##mairesse_change ) for idx in range(test_batches) ] if test_set_x.shape[0] > test_batches * batch_size: test_loss_list.append( test_model_all( test_set_x[test_batches * batch_size:], test_set_y[test_batches * batch_size:], test_set_m[test_batches * batch_size:] ##mairesse_change )) test_loss_list_temp = test_loss_list test_loss_list = np.asarray([t[:-1] for t in test_loss_list]) test_loss = np.sum(test_loss_list[:, 0]) / float( test_set_x.shape[0]) test_perf = 1 - test_loss tp = np.sum(test_loss_list[:, 1]) fp = np.sum(test_loss_list[:, 2]) fn = np.sum(test_loss_list[:, 3]) tn = test_set_x.shape[0] - (tp + fp + fn) fscore = np.mean([ 2 * tp / float(2 * tp + fp + fn), 2 * tn / float(2 * tn + fp + fn) ]) svm_test = np.concatenate([t[-1] for t in test_loss_list_temp], axis=0) svm_train = np.concatenate([t[1] for t in train_losses], axis=0) output = "Test result: accu: " + str( test_perf) + ", macro_fscore: " + str(fscore) + "\ntp: " + str( tp) + " tn:" + str(tn) + " fp: " + str(fp) + " fn: " + str( fn) print(output) ofile.write(output + "\n") ofile.flush() # dump train and test features cPickle.dump(svm_test, open("cvte" + str(attr) + str(cv) + ".p", "wb")) cPickle.dump(svm_train, open("cvtr" + str(attr) + str(cv) + ".p", "wb")) updated_epochs = refresh_epochs() if updated_epochs != None and n_epochs != updated_epochs: n_epochs = updated_epochs print('Epochs updated to ' + str(n_epochs)) return test_perf, fscore
def build_model(self): views_curr = T.tensor4('views') action_hists_curr = T.tensor4('action_hists') actions = T.icol('actions') views_next = T.tensor4('next_views') action_hists_next = T.tensor4('next_action_hists') rewards = T.col('rewards') terminals = T.icol('terminals') # initialize network(s) for computing q-values net_online_in_view, net_online_in_action_hist, self.net_online_out, self.all_layers = \ self.build_network(self.network_builder, self.view_size, self.action_hist_size) net_online_in_curr = {net_online_in_view: views_curr, net_online_in_action_hist: action_hists_curr} \ if self.action_hist_size.w > 0 else {net_online_in_view: views_curr} q_vals_online_curr_train = lasagne.layers.get_output( self.net_online_out, net_online_in_curr, deterministic=False) q_vals_online_curr_test = lasagne.layers.get_output( self.net_online_out, net_online_in_curr, deterministic=True) # for predictions we always use the q-values estimated by the online network on the current state q_vals_pred_train = q_vals_online_curr_train q_vals_pred_test = q_vals_online_curr_test if self.clone_interval > 0: net_target_in_view, net_target_in_action_hist, self.net_target_out, _ = \ self.build_network(self.network_builder, self.view_size, self.action_hist_size) self._clone() net_target_in_next = {net_target_in_view: views_next, net_target_in_action_hist: action_hists_next} \ if self.action_hist_size.w > 0 else {net_target_in_view: views_next} # predict q-values for next state with target network q_vals_target_next = lasagne.layers.get_output( self.net_target_out, net_target_in_next) if self.double_q: # Double Q-Learning: # use online network to choose best action on next state (q_vals_target_argmax)... net_online_in_next = {net_online_in_view: views_next, net_online_in_action_hist: action_hists_next} \ if self.action_hist_size.w > 0 else {net_online_in_view: views_next} q_vals_online_next = lasagne.layers.get_output( self.net_online_out, net_online_in_next) q_vals_target_argmax = T.argmax(q_vals_online_next, axis=1, keepdims=False) # ...but use target network to estimate q-values for these actions q_vals_target = T.diagonal( T.take(q_vals_target_next, q_vals_target_argmax, axis=1)).reshape((-1, 1)) else: q_vals_target = T.max(q_vals_target_next, axis=1, keepdims=True) else: net_target_in_next = {net_online_in_view: views_next, net_online_in_action_hist: action_hists_next} \ if self.action_hist_size.w > 0 else {net_online_in_view: views_next} q_vals_online_next = lasagne.layers.get_output( self.net_online_out, net_target_in_next) q_vals_target = T.max(q_vals_online_next, axis=1, keepdims=True) # define loss computation actionmask = T.eq( T.arange(len(self.actions)).reshape((1, -1)), actions.reshape((-1, 1))).astype(theano.config.floatX) terminals_float = terminals.astype(theano.config.floatX) target = rewards + \ (T.ones_like(terminals_float) - terminals_float) * \ self.discount * q_vals_target output = (q_vals_pred_train * actionmask).sum(axis=1).reshape((-1, 1)) diff = target - output if self.clip_delta > 0: # see https://github.com/spragunr/deep_q_rl/blob/master/deep_q_rl/q_network.py quadratic_part = T.minimum(abs(diff), self.clip_delta) linear_part = abs(diff) - quadratic_part loss = quadratic_part**2 + self.clip_delta * linear_part else: loss = diff**2 # regularization if self.all_layers is not None and self.regularization > 0: l2reg = 0 for lll in self.all_layers: l2reg += regularize_layer_params(lll, l2) * self.regularization loss = T.mean(loss) + l2reg # batch accumulator sum or mean else: loss = T.mean(loss) # define network update for training params = lasagne.layers.helper.get_all_params(self.net_online_out, trainable=True) updates = self.optimizer(loss, params) train_givens = self.shared_batch.givens(views_curr, action_hists_curr, actions, views_next, action_hists_next, rewards, terminals) self.train_fn = theano.function([], [loss], updates=updates, givens=train_givens) # define output prediction predict_givens = self.shared_state.givens(views_curr, action_hists_curr) self.predict_fn = theano.function([], q_vals_pred_test[0], givens=predict_givens)
def get_output_mask(self, train=None): X = self.get_input(train) if not self.mask_zero: return None else: return T.ones_like(X) * (1 - T.eq(X, 0))
terminal_var = T.col(dtype='int8') network = lasagne.layers.InputLayer((mbsize, channel, height, width), input_var) network = lasagne.layers.Conv2DLayer(network, num_filters=3, filter_size=(2, 2), stride=(1, 1)) network = lasagne.layers.DenseLayer(network, num_units=10) network = lasagne.layers.DenseLayer(network, num_units=num_action, nonlinearity=None) network_out = lasagne.layers.get_output(network) network_indexing = network_out[T.arange(mbsize), action_var.reshape((-1, ))] network_max = T.max(network_out, axis=1).reshape((-1, 1)) network_discount = gamma * network_max * (T.ones_like(terminal_var) - terminal_var) f = theano.function([input_var], network_out) g = theano.function([input_var, action_var], network_indexing) h = theano.function([input_var], network_max) j = theano.function([input_var, terminal_var], network_discount) inp = np.uint8(np.random.randint(0, 256, (mbsize, channel, height, width))) print "inp.shape", inp.shape act = np.uint8(np.random.randint(0, num_action, (mbsize, 1))) print act.reshape(-1, ) out = f(inp) print "out", out
import theano import theano.tensor as T k = T.iscalar("k") A = T.vector("A") # Symbolic description of the result result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A, outputs_info=T.ones_like(A), non_sequences=A, n_steps=k) # Optimization saving memory. final_result = result[-1] # Compiled function that returns A**k power = theano.function(inputs=[A, k], outputs=final_result, updates=updates) print power(range(10), 2) print power(range(10), 4)
def __init__(self, input_width, input_height, num_actions, num_frames, discount, learning_rate, rho, rms_epsilon, momentum, clip_delta, freeze_interval, batch_size, network_type, update_rule, lambda_reg, batch_accumulator, pretrained_net, rng, input_scale=255.0): self.input_width = input_width self.input_height = input_height self.num_actions = num_actions self.num_frames = num_frames self.batch_size = batch_size self.discount = discount self.rho = rho self.lr = learning_rate self.rms_epsilon = rms_epsilon self.momentum = momentum self.clip_delta = clip_delta self.freeze_interval = freeze_interval self.rng = rng self.lambda_reg = lambda_reg lasagne.random.set_rng(self.rng) self.update_counter = 0 self.l_in, self.l_act_in, self.l_out, self.pred_z, self.true_z = \ self.build_network(network_type, \ input_width, input_height, num_actions,\ num_frames, batch_size) if self.freeze_interval > 0: self.next_l_in, self.next_l_act_in, self.next_l_out, _d, _d = \ self.build_network(network_type, input_width, \ input_height, num_actions, num_frames, batch_size) self.reset_q_hat() states = T.tensor4('states') next_states = T.tensor4('next_states') rewards = T.col('rewards') actions = T.imatrix('actions') terminals = T.icol('terminals') # Shared variables for training from a minibatch of replayed # state transitions, each consisting of num_frames + 1 (due to # overlap) images, along with the chosen action and resulting # reward and terminal status. self.imgs_shared = theano.shared( np.zeros((batch_size, num_frames*2+1, input_height, input_width), dtype=theano.config.floatX)) self.rewards_shared = theano.shared( np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared( np.zeros((batch_size, num_frames), dtype='int32') ) self.terminals_shared = theano.shared( np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) # Shared variable for a single state, to calculate q_vals. self.state_shared = theano.shared( np.zeros((num_frames*2, input_height, input_width), dtype=theano.config.floatX)) q_vals, z_pred, z_true = lasagne.layers.get_output( [self.l_out, self.pred_z, self.true_z], inputs = {self.l_in: states / input_scale, self.l_act_in: actions} ) if self.freeze_interval > 0: next_q_vals = lasagne.layers.get_output( self.next_l_out, {self.next_l_in: next_states / input_scale, self.next_l_act_in: actions} ) else: next_q_vals = lasagne.layers.get_output( self.l_out, {self.l_in: next_states / input_scale, self.l_act_in: actions} ) next_q_vals = theano.gradient.disconnected_grad(next_q_vals) terminalsX = terminals.astype(theano.config.floatX) actionmask = T.eq(T.arange(num_actions).reshape((1, -1)), actions[:, 0].reshape((-1, 1))).astype(theano.config.floatX) target = (rewards + (T.ones_like(terminalsX) - terminalsX) * self.discount * T.max(next_q_vals, axis=1, keepdims=True)) output = (q_vals * actionmask).sum(axis=1).reshape((-1, 1)) diff = target - output diff_reg = z_true - z_pred if self.clip_delta > 0: # If we simply take the squared clipped diff as our loss, # then the gradient will be zero whenever the diff exceeds # the clip bounds. To avoid this, we extend the loss # linearly past the clip point to keep the gradient constant # in that regime. # # This is equivalent to declaring d loss/d q_vals to be # equal to the clipped diff, then backpropagating from # there, which is what the DeepMind implementation does. quadratic_part = T.minimum(abs(diff), self.clip_delta) linear_part = abs(diff) - quadratic_part loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part else: loss = 0.5 * diff ** 2 loss = loss + 0.5 * self.lambda_reg * (diff_reg ** 2).sum(axis=1) if batch_accumulator == 'sum': loss = T.sum(loss) elif batch_accumulator == 'mean': loss = T.mean(loss) else: raise ValueError("Bad accumulator: {}".format(batch_accumulator)) params = lasagne.layers.helper.get_all_params([self.l_out, self.pred_z, self.true_z]) train_givens = { states: self.imgs_shared[:, :-1], next_states: self.imgs_shared[:, 1:], rewards: self.rewards_shared, actions: self.actions_shared, terminals: self.terminals_shared } if update_rule == 'deepmind_rmsprop': updates = deepmind_rmsprop(loss, params, self.lr, self.rho, self.rms_epsilon) elif update_rule == 'rmsprop': updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, self.rms_epsilon) elif update_rule == 'sgd': updates = lasagne.updates.sgd(loss, params, self.lr) else: raise ValueError("Unrecognized update: {}".format(update_rule)) if self.momentum > 0: updates = lasagne.updates.apply_momentum(updates, None, self.momentum) self._train = theano.function([], [loss], updates=updates, givens=train_givens) q_givens = { states: self.state_shared.reshape((1, self.num_frames*2, self.input_height, self.input_width)) } self._q_vals = theano.function([], q_vals[0], givens=q_givens)
def __init__(self, input_width, input_height, num_actions, num_frames, discount, learning_rate, rho, rms_epsilon, momentum, clip_delta, freeze_interval, batch_size, network_type, update_rule, batch_accumulator, rng, action_selection, input_scale=255.0): self.input_width = input_width self.input_height = input_height self.num_actions = num_actions self.num_frames = num_frames self.batch_size = batch_size self.discount = discount self.rho = rho self.lr = learning_rate self.rms_epsilon = rms_epsilon self.momentum = momentum self.clip_delta = clip_delta self.freeze_interval = freeze_interval self.rng = rng self.max_compression_loss = 0 if action_selection == 'epsilon-greedy': self.choose_action = self.choose_action_epsilon_greedy elif action_selection == 'softmax': self.choose_action = self.choose_action_softmax else: raise ValueError( "Unrecognized action selection: {}".format(action_selection)) lasagne.random.set_rng(self.rng) self.update_counter = 0 self.l_out, self.original, self.reconstructed = self.build_network( network_type, input_width, input_height, num_actions, num_frames, batch_size) if self.freeze_interval > 0: self.next_l_out, _, _ = self.build_network(network_type, input_width, input_height, num_actions, num_frames, batch_size) self.reset_q_hat() states = T.tensor4('states') next_states = T.tensor4('next_states') rewards = T.col('rewards') actions = T.icol('actions') terminals = T.icol('terminals') self.states_shared = theano.shared( np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX)) self.next_states_shared = theano.shared( np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(np.zeros( (batch_size, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) q_vals = lasagne.layers.get_output(self.l_out, states / input_scale) if self.freeze_interval > 0: next_q_vals = lasagne.layers.get_output(self.next_l_out, next_states / input_scale) else: next_q_vals = lasagne.layers.get_output(self.l_out, next_states / input_scale) next_q_vals = theano.gradient.disconnected_grad(next_q_vals) original_vals = lasagne.layers.get_output(self.original, states / input_scale) reconstructed_vals = lasagne.layers.get_output(self.reconstructed, states / input_scale) compression_loss = T.mean(0.5 * (original_vals - reconstructed_vals)**2) target = (rewards + compression_loss + (T.ones_like(terminals) - terminals) * self.discount * T.max(next_q_vals, axis=1, keepdims=True)) diff = target - q_vals[T.arange(batch_size), actions.reshape((-1, ))].reshape((-1, 1)) if self.clip_delta > 0: # If we simply take the squared clipped diff as our loss, # then the gradient will be zero whenever the diff exceeds # the clip bounds. To avoid this, we extend the loss # linearly past the clip point to keep the gradient constant # in that regime. # # This is equivalent to declaring d loss/d q_vals to be # equal to the clipped diff, then backpropagating from # there, which is what the DeepMind implementation does. quadratic_part = T.minimum(abs(diff), self.clip_delta) linear_part = abs(diff) - quadratic_part loss = 0.5 * quadratic_part**2 + self.clip_delta * linear_part else: loss = 0.5 * diff**2 if batch_accumulator == 'sum': loss = T.sum(loss) elif batch_accumulator == 'mean': loss = T.mean(loss) else: raise ValueError("Bad accumulator: {}".format(batch_accumulator)) params = lasagne.layers.helper.get_all_params(self.l_out) compression_params = lasagne.layers.helper.get_all_params( self.reconstructed) givens = { states: self.states_shared, next_states: self.next_states_shared, rewards: self.rewards_shared, actions: self.actions_shared, terminals: self.terminals_shared } if update_rule == 'deepmind_rmsprop': updates = deepmind_rmsprop(loss, params, self.lr, self.rho, self.rms_epsilon) elif update_rule == 'rmsprop': updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, self.rms_epsilon) elif update_rule == 'sgd': updates = lasagne.updates.sgd(loss, params, self.lr) else: raise ValueError("Unrecognized update: {}".format(update_rule)) if self.momentum > 0: updates = lasagne.updates.apply_momentum(updates, None, self.momentum) compression_updates = lasagne.updates.rmsprop(compression_loss, compression_params, self.lr, self.rho, self.rms_epsilon) updates += compression_updates self._train = theano.function([], [loss, compression_loss, q_vals], updates=updates, givens=givens) self._q_vals = theano.function([], q_vals, givens={states: self.states_shared}) self._compression_loss = theano.function( [], compression_loss, givens={states: self.states_shared})
def __init__(self, **option): # source and target embedding dim sedim, tedim = option["embdim"] # source, target and attention hidden dim shdim, thdim, ahdim = option["hidden"] # maxout hidden dim maxdim = option["maxhid"] # maxout part maxpart = option["maxpart"] # deepout hidden dim deephid = option["deephid"] svocab, tvocab = option["vocabulary"] sw2id, sid2w = svocab tw2id, tid2w = tvocab # source and target vocabulary size svsize, tvsize = len(sid2w), len(tid2w) if "scope" not in option or option["scope"] is None: option["scope"] = "proteinsearch" if "initializer" not in option: option["initializer"] = None if "regularizer" not in option: option["regularizer"] = None if "keep_prob" not in option: option["keep_prob"] = 1.0 dtype = theano.config.floatX initializer = option["initializer"] regularizer = option["regularizer"] keep_prob = option["keep_prob"] or 1.0 scope = option["scope"] decoder_scope = "decoder2" encoder = Encoder(sedim, shdim) from . import decoder2 decoder = decoder2.DecoderGruCond(2, option['method'], tedim, thdim, ahdim, 2 * shdim + thdim, dim_readout=deephid, n_y_vocab=tvsize) # training graph with ops.variable_scope(scope, initializer=initializer, regularizer=regularizer, dtype=dtype): src_seq = T.imatrix("source_sequence") src_mask = T.matrix("source_sequence_mask") tgt_seq = T.imatrix("target_sequence") tgt_mask = T.matrix("target_sequence_mask") byseq = T.imatrix("backward_target_sequence") with ops.variable_scope("source_embedding"): source_embedding = ops.get_variable("embedding", [svsize, sedim]) source_bias = ops.get_variable("bias", [sedim]) with ops.variable_scope("target_embedding"): target_embedding = ops.get_variable("embedding", [tvsize, tedim]) target_bias = ops.get_variable("bias", [tedim]) source_inputs = nn.embedding_lookup(source_embedding, src_seq) + source_bias target_inputs = nn.embedding_lookup(target_embedding, tgt_seq) + target_bias by_inputs = nn.embedding_lookup(target_embedding, byseq) + target_bias if keep_prob < 1.0: source_inputs = nn.dropout(source_inputs, keep_prob=keep_prob) target_inputs = nn.dropout(target_inputs, keep_prob=keep_prob) by_inputs = nn.dropout(by_inputs, keep_prob=keep_prob) states, r_states = encoder.forward(source_inputs, src_mask) annotation = T.concatenate([states, r_states], 2) annotation = nn.dropout(annotation, keep_prob=keep_prob) from . import softdec soft_decoder = softdec.SoftDecoder(option["eosid"], option["softk"], tedim, thdim, ahdim, 2 * shdim, dim_readout=deephid, n_y_vocab=tvsize) with ops.variable_scope('soft_decoder'): initial_state = nn.feedforward(states[-1], [shdim, thdim], True, scope='initial', activation=T.tanh) mapped_keys = map_key(annotation, 2 * shdim, ahdim) soft_states, _, _, soft_mask = soft_decoder.infer( mapped_keys, src_mask, annotation, initial_state, target_embedding, target_bias, keep_prob) with ops.variable_scope('soft_decoder', reuse=True): _, _, soft_cost, _ = soft_decoder.forward( byseq, by_inputs, tgt_mask, mapped_keys, src_mask, annotation, initial_state, keep_prob) # compute initial state for decoder # first state of backward encoder # initialize with only encoder state final_state = r_states[0] with ops.variable_scope(decoder_scope): initial_state = nn.feedforward(final_state, [shdim, thdim], True, scope="initial", activation=T.tanh) # keys for query with ops.variable_scope('map-key-src'): mapped_keys_src = map_key(annotation, 2 * shdim, ahdim) with ops.variable_scope('map-key-soft'): mapped_keys_soft = map_key(soft_states, thdim, ahdim) _, _, _, snt_cost = decoder.forward( tgt_seq, target_inputs, tgt_mask, [mapped_keys_src, mapped_keys_soft], [src_mask, soft_mask], [annotation, soft_states], initial_state, keep_prob) ce = snt_cost true_cost = T.mean(ce) lamb = theano.shared(numpy.asarray(option['lambda'], dtype), 'lambda') cost = lamb * soft_cost + (1 - lamb) * true_cost # import utils.ttensor # print('true_cost %d:' % len(utils.ttensor.find_inputs_and_params(true_cost)[0])) # for xxx in utils.ttensor.find_inputs_and_params(true_cost)[0]: # print('\t', xxx) # print('soft_cost %d:' % len(utils.ttensor.find_inputs_and_params(soft_cost)[0])) # for xxx in utils.ttensor.find_inputs_and_params(soft_cost)[0]: # print('\t', xxx) # print('tot_cost: %d' % len(utils.ttensor.find_inputs_and_params(cost)[0])) # for xxx in utils.ttensor.find_inputs_and_params(cost)[0]: # print('\t', xxx) # print('snt_cost: %d' % len(utils.ttensor.find_inputs_and_params(snt_cost)[0])) # for xxx in utils.ttensor.find_inputs_and_params(snt_cost)[0]: # print('\t', xxx) training_inputs = [src_seq, src_mask, tgt_seq, tgt_mask, byseq] training_outputs = [cost, soft_cost, true_cost] # get_snt_cost = theano.function(training_inputs[:4], snt_cost) get_snt_cost = None # decoding graph with ops.variable_scope(scope, reuse=True): prev_words = T.ivector("prev_words") # disable dropout source_inputs = nn.embedding_lookup(source_embedding, src_seq) source_inputs = source_inputs + source_bias target_inputs = nn.embedding_lookup(target_embedding, tgt_seq) target_inputs = target_inputs + target_bias states, r_states = encoder.forward(source_inputs, src_mask) annotation = T.concatenate([states, r_states], 2) with ops.variable_scope('soft_decoder'): initial_state = nn.feedforward(states[-1], [shdim, thdim], True, scope='initial', activation=T.tanh) mapped_keys = map_key(annotation, 2 * shdim, ahdim) soft_states, soft_contexts, soft_probs, soft_mask = soft_decoder.infer( mapped_keys, src_mask, annotation, initial_state, target_embedding, target_bias, 1.0) # decoder final_state = r_states[0] with ops.variable_scope(decoder_scope): initial_state = nn.feedforward(final_state, [shdim, thdim], True, scope="initial", activation=T.tanh) # keys for query with ops.variable_scope('map-key-src'): mapped_keys_src = map_key(annotation, 2 * shdim, ahdim) with ops.variable_scope('map-key-soft'): mapped_keys_soft = map_key(soft_states, thdim, ahdim) prev_inputs = nn.embedding_lookup(target_embedding, prev_words) prev_inputs = prev_inputs + target_bias cond = T.neq(prev_words, 0) # zeros out embedding if y is 0, which indicates <s> prev_inputs = prev_inputs * cond[:, None] with ops.variable_scope(decoder_scope): mask = T.ones_like(prev_words, dtype=dtype) next_state, context = decoder.step( prev_inputs, mask, initial_state, *[ mapped_keys_src, mapped_keys_soft, annotation, soft_states, src_mask, soft_mask ]) probs = decoder.prediction(prev_inputs, next_state, context) # encoding encoding_inputs = [src_seq, src_mask] encoding_outputs = [ initial_state, annotation, soft_states, mapped_keys_src, mapped_keys_soft, soft_mask ] encode = theano.function(encoding_inputs, encoding_outputs) if option["decoder"] == "GruSimple": raise ValueError() prediction_inputs = [ prev_words, initial_state, annotation, mapped_keys, src_mask ] prediction_outputs = [probs, context] predict = theano.function(prediction_inputs, prediction_outputs) generation_inputs = [prev_words, initial_state, context] generation_outputs = next_state generate = theano.function(generation_inputs, generation_outputs) self.predict = predict self.generate = generate elif option["decoder"] == "GruCond": prediction_inputs = [ prev_words, initial_state, annotation, mapped_keys_src, src_mask, soft_states, mapped_keys_soft, soft_mask ] prediction_outputs = [probs, next_state] predict = theano.function(prediction_inputs, prediction_outputs) self.predict = predict self.cost = cost self.inputs = training_inputs self.outputs = training_outputs self.updates = [] self.align = None self.sample = None self.encode = encode self.get_snt_cost = get_snt_cost self.option = option
def theano_expr(self, targets, mode='stack', sparse=False): """ Return the one-hot transformation as a symbolic expression. If labels appear multiple times, their value in the one-hot vector is incremented. Parameters ---------- targets : tensor_like, 1- or 2-dimensional, integer dtype A symbolic tensor representing labels as integers between 0 and `max_labels` - 1, `max_labels` supplied at formatter construction. mode : string The way in which to convert the labels to arrays. Takes three different options: - "concatenate" : concatenates the one-hot vectors from multiple labels - "stack" : returns a matrix where each row is the one-hot vector of a label - "merge" : merges the one-hot vectors together to form a vector where the elements are the result of an indicator function NB: As the result of an indicator function the result is the same in case a label is duplicated in the input. sparse : bool If true then the return value is sparse matrix. Note that if sparse is True, then mode cannot be 'stack' because sparse matrices need to be 2D Returns ------- one_hot : TensorVariable, 1, 2 or 3-dimensional, sparse or dense A symbolic tensor representing a one-hot encoding of the supplied labels. """ if mode not in ('concatenate', 'stack', 'merge'): raise ValueError("%s got bad mode argument '%s'" % (self.__class__.__name__, str(self._max_labels))) elif mode == 'stack' and sparse: raise ValueError("Sparse matrices need to be 2D, hence they" "cannot be stacked") squeeze_required = False if targets.ndim != 2: if targets.ndim == 1: squeeze_required = True targets = targets.dimshuffle('x', 0) else: raise ValueError("targets tensor must be 1 or 2-dimensional") if 'int' not in str(targets.dtype): raise TypeError("need an integer tensor for targets") if sparse: if mode == 'concatenate': one_hot = theano.sparse.CSR( tensor.ones_like(targets, dtype=self._dtype).flatten(), (targets.flatten() + tensor.arange(targets.size) * self._max_labels) % (self._max_labels * targets.shape[1]), tensor.arange(targets.shape[0] + 1) * targets.shape[1], tensor.stack(targets.shape[0], self._max_labels * targets.shape[1])) else: one_hot = theano.sparse.CSR( tensor.ones_like(targets, dtype=self._dtype).flatten(), targets.flatten(), tensor.arange(targets.shape[0] + 1) * targets.shape[1], tensor.stack(targets.shape[0], self._max_labels)) else: if mode == 'concatenate': one_hot = tensor.zeros( (targets.shape[0] * targets.shape[1], self._max_labels)) one_hot = tensor.set_subtensor( one_hot[tensor.arange(targets.size), targets.flatten()], 1) one_hot = one_hot.reshape( (targets.shape[0], targets.shape[1] * self._max_labels)) elif mode == 'merge': one_hot = tensor.zeros((targets.shape[0], self._max_labels)) one_hot = tensor.set_subtensor( one_hot[tensor.arange(targets.size) % targets.shape[0], targets.T.flatten()], 1) else: one_hot = tensor.zeros( (targets.shape[0], targets.shape[1], self._max_labels)) one_hot = tensor.set_subtensor( one_hot[tensor.arange(targets.shape[0]).reshape( (targets.shape[0], 1)), tensor.arange(targets.shape[1]), targets], 1) if squeeze_required: if one_hot.ndim == 2: one_hot = one_hot.reshape((one_hot.shape[1], )) if one_hot.ndim == 3: one_hot = one_hot.reshape( (one_hot.shape[1], one_hot.shape[2])) return one_hot