def build_theano_models(self, algo, algo_params): epsilon = 1e-6 kl = lambda mu, sig: sig+mu**2-TT.log(sig) X, y = TT.dmatrices('X', 'y') params = TT.dvector('params') a, b, c, l_F, F, l_FC, FC = self.unpack_params(params) sig2_n, sig_f = TT.exp(2*a), TT.exp(b) l_FF = TT.dot(X, l_F)+l_FC FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1) Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1) Phi = sig_f*TT.sqrt(2./self.M)*Phi noise = TT.log(1+TT.exp(c)) PhiTPhi = TT.dot(Phi.T, Phi) A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi) L = Tlin.cholesky(A) Li = Tlin.matrix_inverse(L) PhiTy = Phi.T.dot(y) beta = TT.dot(Li, PhiTy) alpha = TT.dot(Li.T, beta) mu_f = TT.dot(Phi, alpha) var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None] dsp = noise*(var_f+1) mu_l = TT.sum(TT.mean(l_F, axis=1)) sig_l = TT.sum(TT.std(l_F, axis=1)) mu_w = TT.sum(TT.mean(F, axis=1)) sig_w = TT.sum(TT.std(F, axis=1)) hermgauss = np.polynomial.hermite.hermgauss(30) herm_x = Ts(hermgauss[0])[None, None, :] herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :] herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None] nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*( TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None]) enll = herm_w*nlk nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*( (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M) cost = (nlml+penelty)/X.shape[0] grads = TT.grad(cost, params) updates = getattr(OPT, algo)(self.params, grads, **algo_params) updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9) train_inputs = [X, y] train_outputs = [cost, alpha, Li] self.train_func = Tf(train_inputs, train_outputs, givens=[(params, self.params)]) self.train_iter_func = Tf(train_inputs, train_outputs, givens=[(params, self.params)], updates=updates) Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha') l_FFs = TT.dot(Xs, l_F)+l_FC FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1) Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1) Phis = sig_f*TT.sqrt(2./self.M)*Phis mu_pred = TT.dot(Phis, alpha) std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5 pred_inputs = [Xs, alpha, Li] pred_outputs = [mu_pred, std_pred] self.pred_func = Tf(pred_inputs, pred_outputs, givens=[(params, self.params)])
def build_ann(self, weights, biases, layer_sizes=[784, 400, 10], activation=[Tann.sigmoid, Tann.sigmoid, Tann.sigmoid]): """ Builds a neural network with topology from the layer_sizes. :parameter activation is the activation function for the network :parameter rand_limit_min is the minimum limit for random initialization of weights for all layers :parameter rand_limit_max is the maximum limit for random initialization of weights for all layers """ params = [] inputs, answers = T.dmatrices('input', 'answers') assert len(layer_sizes) >= 2 # Builds the layers for i in range(len(layer_sizes) - 1): layer = HiddenLayer(inputs, layer_sizes[i], layer_sizes[i + 1], weights[i], biases[i], activation=activation[i]) params.append(layer.W) params.append(layer.b) self.layers.append(layer) # Sets up the activation functions through the network layer = self.layers[0] previous_out = layer.activation(T.dot(layer.input, layer.W) + layer.b) x_h_out = layer.activation(T.dot(layer.input, layer.W) + layer.b) for i in range(len(self.layers) - 1): layer = self.layers[i + 1] x_h_out = layer.activation(T.dot(previous_out, layer.W) + layer.b) previous_out = x_h_out self.predictor = theano.function([inputs], [x_h_out]) # Activate
def createGradientFunctions(self): #create X = T.dmatrices("X") mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R") mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma') logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd') logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1]) negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2) theta = mu+T.exp(logSigma)*v W=theta y=X[:,0] X_sim=X[:,1:] f = (T.dot(X_sim,W)+u).flatten() gradvariables = [mu, logSigma, logLambd] logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2) logp = (negKL + logLike)/self.m optimizer = -logp self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore') self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore') self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore') derivatives = T.grad(logp,gradvariables) derivatives.append(logp) self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore') self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
def build_ann(self, layer_sizes=[784, 24, 10], activation=Tann.sigmoid, rand_limit_min=-.1, rand_limit_max=.1): """ Builds a neural network with topology from the layer_sizes. :parameter activation is the activation function for the network :parameter rand_limit_min is the minimum limit for random initialization of weights for all layers :parameter rand_limit_max is the maximum limit for random initialization of weights for all layers """ params = [] inputs, answers = T.dmatrices('input', 'answers') assert len(layer_sizes) >= 2 for i in range(len(layer_sizes) - 1): layer = HiddenLayer(inputs, layer_sizes[i], layer_sizes[i + 1], activation=activation, rand_limit_min=rand_limit_min, rand_limit_max=rand_limit_max) # outputs.append(layer.output) params.append(layer.W) params.append(layer.b) self.layers.append(layer) previous_out = self.layers[0].output x_h_out = self.layers[0].output for i in range(len(self.layers)-1): layer = self.layers[i+1] x_h_out = Tann.sigmoid(T.dot(previous_out, layer.W) + layer.b) previous_out = x_h_out error = T.sum((answers - x_h_out) ** 2) gradients = T.grad(error, params) backprop_acts = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.predictor = theano.function([inputs], [x_h_out]) self.trainer = theano.function([inputs, answers], error, updates=backprop_acts)
def compile_theano_functions(): """ Returns compiled theano functions. Notes ----- Originally used to speedup multiplication of large matrices and vectors. Caused strange issue in nipype where nipype unecessarily reran nodes that use these compiled functions. Not used in current implementation. """ import theano.tensor as T import theano def TnormCols(X): """ Theano expression which centers and normalizes columns of X `||x_i|| = 1` """ Xc = X - X.mean(0) return Xc/T.sqrt( (Xc**2.).sum(0) ) def TzscorrCols(Xn): """ Theano expression which returns Fisher transformed correlation values between columns of a normalized input, `X_n`. Diagonal is set to zero. """ C_X = T.dot(Xn.T, Xn)-T.eye(Xn.shape[1]) return 0.5*T.log((1+C_X)/(1-C_X)) X,Y = T.dmatrices('X','Y') tdot = theano.function([X,Y], T.dot(X,Y)) tnormcols = theano.function([X], TnormCols(X)) return tdot, tnormcols
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' X, u = T.dmatrices("X", "u") f, y, v = T.dcols("f", "y", "v") mu = self.params[0] logSigma = self.params[1] logLambda = sharedX(np.log(self.sigma_e), name='logLambda') #logLambda = self.params[2] negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 - T.exp(logSigma)**2) f = self.regression_simulator(X, u, v, mu, logSigma) logLike = -self.m * (0.5 * np.log(2 * np.pi) + logLambda) - 0.5 * T.sum( (y - f)**2) / (T.exp(logLambda)**2) / self.Lu elbo = (negKL + logLike) obj = -elbo self.lowerboundfunction = th.function([X, y, u, v], obj, on_unused_input='ignore') derivatives = T.grad(obj, self.params) self.gradientfunction = th.function([X, y, u, v], derivatives, on_unused_input='ignore')
def compute_more_than_one(): a,b = T.dmatrices('a','b') diff = a - b abs_diff = abs(diff) diff_sq = diff**2 f = theano.function([a,b],[diff, abs_diff, diff_sq]) print f([[0,0],[1,2]], [[2,3],[4,1]])
def __init__(self, n_x, n_h, n_y, lr=0, nonlinear='softplus', valid_x=None, valid_y=None): print 'PL', n_x, n_h, n_y, lr, nonlinear if lr == 0: lr = 10. / n_h self.lr = lr self.fitted = False self.n_x = n_x self.n_h = n_h self.n_y = n_y self.nonlinear = nonlinear self.valid_x = valid_x self.valid_y = valid_y if self.nonlinear == 'softplus': def g(_x): return T.log(T.exp(_x) + 1) else: raise Exception() # Define Theano computational graph x, y, w1, b1, w2, b2, A = T.dmatrices('x', 'y', 'w1', 'b1', 'w2', 'b2', 'A') h1 = g(T.dot(w1, x) + T.dot(b1, A)) h2 = g(T.dot(w2, h1) + T.dot(b2, A)) p = T.nnet.softmax(h2.T).T logpy = (- T.nnet.categorical_crossentropy(p.T, y.T).T).reshape((1,-1)) dlogpy_dw = T.grad(logpy.sum(), [w1, b1, w2, b2]) H = T.nnet.categorical_crossentropy(p.T, p.T).T #entropy dH_dw = T.grad(H.sum(), [w1, b1, w2, b2]) # Define functions to call self.f_p = theano.function([x, w1, b1, w2, b2, A], p) self.f_dlogpy_dw = theano.function([x, y, w1, b1, w2, b2, A], [logpy] + dlogpy_dw) self.f_dH_dw = theano.function([x, w1, b1, w2, b2, A], [H] + dH_dw)
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' X,u = T.dmatrices("X","u") f, y, v = T.dcols("f", "y", "v") mu = self.params[0] logSigma = self.params[1] logLambda = sharedX(np.log(self.sigma_e),name='logLambda') #logLambda = self.params[2] negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2) f = self.regression_simulator(X,u,v,mu,logSigma) logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f)**2)/(T.exp(logLambda)**2)/self.Lu elbo = (negKL + logLike) obj = -elbo self.lowerboundfunction = th.function([X, y, u, v], obj, on_unused_input='ignore') derivatives = T.grad(obj,self.params) self.gradientfunction = th.function([X,y,u,v], derivatives, on_unused_input='ignore')
def compile_theano_functions(): """ Returns compiled theano functions. Notes ----- Originally used to speedup multiplication of large matrices and vectors. Caused strange issue in nipype where nipype unecessarily reran nodes that use these compiled functions. Not used in current implementation. """ import theano.tensor as T import theano def TnormCols(X): """ Theano expression which centers and normalizes columns of X `||x_i|| = 1` """ Xc = X - X.mean(0) return Xc / T.sqrt((Xc**2.).sum(0)) def TzscorrCols(Xn): """ Theano expression which returns Fisher transformed correlation values between columns of a normalized input, `X_n`. Diagonal is set to zero. """ C_X = T.dot(Xn.T, Xn) - T.eye(Xn.shape[1]) return 0.5 * T.log((1 + C_X) / (1 - C_X)) X, Y = T.dmatrices('X', 'Y') tdot = theano.function([X, Y], T.dot(X, Y)) tnormcols = theano.function([X], TnormCols(X)) return tdot, tnormcols
def multipleThingAtTheSameTime(a, b): x, y = T.dmatrices('x', 'y') diff = x - y abs_diff = abs(diff) diff_squared = diff**2 summ = x + y f = th.function([x,y], [diff, abs_diff, diff_squared, summ]) print(f(a, b))
def createGradientFunctions(self): #Create the Theano variables W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps") #Create biases as cols so they can be broadcasted for minibatches b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6") z1 = T.col("z1") if self.continuous: #convolve x # no_filters = 100, stride = 4, filter_size = 50 h_encoder = T.tanh(T.dot(W1,x) + b1) #h_encoder = T.dot(W1,x) + b1 else: h_encoder = T.tanh(T.dot(W1,x) + b1) mu_encoder = T.dot(W2,h_encoder) + b2 log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3) mu_encoder = T.dot(W2,h_encoder) + b2 log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3) #Find the hidden variable z z = mu_encoder + T.exp(log_sigma_encoder)*eps prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder)) #Set up decoding layer if self.continuous: h_decoder = T.nnet.softplus(T.dot(W4,z) + b4) h_dec = T.nnet.softplus(T.dot(W4,z1) + b4) #h_decoder = T.dot(W4,z) + b4 #h_dec = T.dot(W4,z1) + b4 mu_decoder = T.tanh(T.dot(W5,h_decoder) + b5) mu_dec = T.tanh(T.dot(W5,h_dec) + b5) log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6) logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2) gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6] else: h_decoder = T.tanh(T.dot(W4,z) + b4) y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5) logpxz = -T.nnet.binary_crossentropy(y,x).sum() gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5] logp = logpxz + prior #Compute all the gradients derivatives = T.grad(logp,gradvariables) #Add the lowerbound so we can keep track of results derivatives.append(logp) self.get_z = th.function(gradvariables+[x,eps],z,on_unused_input='ignore') self.generate = th.function(gradvariables+[z1,x,eps],mu_dec,on_unused_input='ignore') self.predict = th.function(gradvariables+[x,eps],mu_decoder,on_unused_input='ignore') self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
def __init__(self, Q, D, layers, order, D_cum_sum, N, M, non_rec): try: print('Trying to load model...') with open('model_SV1.save', 'rb') as file_handle: self.f, self.g = pickle.load(file_handle) print('Loaded!') return except: print('Failed. Creating a new model...') print('Setting up variables...') hyp, SIGMA_S, U, b, MU_S = T.dmatrices('hyp', 'SIGMA_S', 'U', 'b','MU_S') y, MEAN_MAP, sn, sf = T.dvectors('y','MEAN_MAP','sn','sf') w = T.dscalars('w') if Q > 1: X = T.dmatrix('X') else: X = T.dvector('X') if layers > 1: MU, SIGMA = T.dmatrices('MU', 'SIGMA') else: MU, SIGMA = T.dvectors('MU', 'SIGMA') SIGMA_trf, SIGMA_S_trf = T.log(1+T.exp(SIGMA))**2, T.log(1+T.exp(SIGMA_S))**2 sf_trf, sn_trf, lengthscale_trf, lengthscale_p_trf = T.log(1 + T.exp(sf))**2, T.log(1 + T.exp(sn))**2, T.log(1 + T.exp(hyp[:,0])), T.log(1 + T.exp(hyp[:,1])) print('Setting up model...') LL, KL = self.get_model(w, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, MU_S, SIGMA_S_trf, MU, SIGMA_trf, U, b, X, y, MEAN_MAP, Q, D, D_cum_sum, layers, order, non_rec, N, M) print('Compiling model...') inputs = {'X': X, 'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U': U, 'b': b, 'hyp': hyp, 'y': y, 'MEAN_MAP': MEAN_MAP, 'sn': sn, 'sf': sf, 'w': w} z = 0.0 * sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph f = {'LL': LL, 'KL': KL} self.f = {fn: theano.function(list(inputs.values()), fv+z, name=fn, on_unused_input='ignore') for fn,fv in f.items()} g = {'LL': LL, 'KL': KL} wrt = {'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U': U, 'b': b, 'hyp': hyp, 'MEAN_MAP': MEAN_MAP, 'sn': sn, 'sf': sf, 'w': w} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in g.items()} for vn, vv in wrt.items()} with open('model_SV1.save', 'wb') as file_handle: print('Saving model...') sys.setrecursionlimit(100000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def multiple_input_output(): # multiple input, multiple output a, b = T.dmatrices('a', 'b') # plural diff = a - b abs_diff =abs(diff) diff_squared = diff**2 f = function([a, b], [diff, abs_diff, diff_squared]) print '\nmultiple input, multiple outpur' print f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
def test_examples_3(self): a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = function([a, b], [diff, abs_diff, diff_squared]) elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]]) assert numpy.all(elems[0] == array([[1., 0.], [-1., -2.]])) assert numpy.all(elems[1] == array([[1., 0.], [1., 2.]])) assert numpy.all(elems[2] == array([[1., 0.], [1., 4.]]))
def test_examples_3(self): a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = function([a, b], [diff, abs_diff, diff_squared]) elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]]) assert numpy.all( elems[0] == array([[ 1., 0.],[-1., -2.]])) assert numpy.all( elems[1] == array([[ 1., 0.],[ 1., 2.]])) assert numpy.all( elems[2] == array([[ 1., 0.],[ 1., 4.]]))
def variables(self): # Define parameters 'w' v = {} v['w0x'], v['w0y'] = T.dmatrices('w0x','w0y') v['b0'] = T.dmatrix('b0') for i in range(1, len(self.n_hidden_q)): v['w'+str(i)] = T.dmatrix('w'+str(i)) v['b'+str(i)] = T.dmatrix('b'+str(i)) v['mean_w'] = T.dmatrix('mean_w') v['mean_b'] = T.dmatrix('mean_b') if self.type_qz in ['gaussian','gaussianmarg']: v['logvar_w'] = T.dmatrix('logvar_w') v['logvar_b'] = T.dmatrix('logvar_b') w = {} w['w0y'], w['w0z'] = T.dmatrices('w0y','w0z') w['b0'] = T.dmatrix('b0') for i in range(1, len(self.n_hidden_p)): w['w'+str(i)] = T.dmatrix('w'+str(i)) w['b'+str(i)] = T.dmatrix('b'+str(i)) w['out_w'] = T.dmatrix('out_w') w['out_b'] = T.dmatrix('out_b') if self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian': w['out_logvar_w'] = T.dmatrix('out_logvar_w') w['out_logvar_b'] = T.dmatrix('out_logvar_b') w['logpy'] = T.dmatrix('logpy') if self.type_pz == 'studentt': w['logv'] = T.dmatrix('logv') # Define latent variables 'z' z = {'eps': T.dmatrix('eps')} # Define observed variables 'x' x = {} x['x'] = T.dmatrix('x') x['y'] = T.dmatrix('y') return v, w, x, z
def variables(self): # Define parameters 'w' v = {} v['w0x'], v['w0y'] = T.dmatrices('w0x', 'w0y') v['b0'] = T.dmatrix('b0') for i in range(1, len(self.n_hidden_q)): v['w' + str(i)] = T.dmatrix('w' + str(i)) v['b' + str(i)] = T.dmatrix('b' + str(i)) v['mean_w'] = T.dmatrix('mean_w') v['mean_b'] = T.dmatrix('mean_b') if self.type_qz in ['gaussian', 'gaussianmarg']: v['logvar_w'] = T.dmatrix('logvar_w') v['logvar_b'] = T.dmatrix('logvar_b') w = {} w['w0y'], w['w0z'] = T.dmatrices('w0y', 'w0z') w['b0'] = T.dmatrix('b0') for i in range(1, len(self.n_hidden_p)): w['w' + str(i)] = T.dmatrix('w' + str(i)) w['b' + str(i)] = T.dmatrix('b' + str(i)) w['out_w'] = T.dmatrix('out_w') w['out_b'] = T.dmatrix('out_b') if self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian': w['out_logvar_w'] = T.dmatrix('out_logvar_w') w['out_logvar_b'] = T.dmatrix('out_logvar_b') w['logpy'] = T.dmatrix('logpy') if self.type_pz == 'studentt': w['logv'] = T.dmatrix('logv') # Define latent variables 'z' z = {'eps': T.dmatrix('eps')} # Define observed variables 'x' x = {} x['x'] = T.dmatrix('x') x['y'] = T.dmatrix('y') return v, w, x, z
def createGradientFunctions(self): #Create the Theano variables W1, W2, W3, W4, W5, W6, x, eps = T.dmatrices("W1", "W2", "W3", "W4", "W5", "W6", "x", "eps") #Create biases as cols so they can be broadcasted for minibatches b1, b2, b3, b4, b5, b6 = T.dcols("b1", "b2", "b3", "b4", "b5", "b6") if self.continuous: h_encoder = T.nnet.softplus(T.dot(W1, x) + b1) else: h_encoder = T.tanh(T.dot(W1, x) + b1) mu_encoder = T.dot(W2, h_encoder) + b2 log_sigma_encoder = 0.5 * (T.dot(W3, h_encoder) + b3) #Find the hidden variable z z = mu_encoder + T.exp(log_sigma_encoder) * eps prior = 0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 - T.exp(2 * log_sigma_encoder)) #Set up decoding layer if self.continuous: h_decoder = T.nnet.softplus(T.dot(W4, z) + b4) mu_decoder = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5) log_sigma_decoder = 0.5 * (T.dot(W6, h_decoder) + b6) logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2) gradvariables = [W1, W2, W3, W4, W5, W6, b1, b2, b3, b4, b5, b6] else: h_decoder = T.tanh(T.dot(W4, z) + b4) y = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5) logpxz = -T.nnet.binary_crossentropy(y, x).sum() gradvariables = [W1, W2, W3, W4, W5, b1, b2, b3, b4, b5] logp = logpxz + prior #Compute all the gradients derivatives = T.grad(logp, gradvariables) #Add the lowerbound so we can keep track of results derivatives.append(logp) self.gradientfunction = th.function(gradvariables + [x, eps], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [x, eps], logp, on_unused_input='ignore') self.zfunction = th.function(gradvariables + [x, eps], z, on_unused_input='ignore')
def test_1_examples_compute_more_than_1_return_value(): a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) diff_res, abs_res, diff_squared_res = f([[1, 1], [1, 1]], [[0, 0], [2, 2]]) np.testing.assert_array_almost_equal(diff_res, [[1, 1], [-1, -1]]) np.testing.assert_array_almost_equal(abs_res, [[1, 1], [1, 1]]) np.testing.assert_array_almost_equal(diff_squared_res, [[1, 1], [1, 1]])
def setUp(self): test_in_1 = InputLayer((None, None)) test_in_2 = InputLayer((None, None)) self.l = CosineSimilarityLayer(test_in_1, test_in_2) in1, in2 = T.dmatrices('in1', 'in2') pred_out = layers.get_output(self.l, inputs={ test_in_1: in1, test_in_2: in2 }) self.fn = theano.function([in1, in2], pred_out)
def createGradientFunctions(self): #create X = T.dmatrices("X") mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R") mu = sharedX(np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma') logLambd = sharedX(np.matrix(np.random.uniform(0, 10)), name='logLambd') logLambd = T.patternbroadcast(T.dmatrix("logLambd"), [1, 1]) negKL = 0.5 * T.sum(1 + 2 * logSigma - mu**2 - T.exp(logSigma)**2) theta = mu + T.exp(logSigma) * v W = theta y = X[:, 0] X_sim = X[:, 1:] f = (T.dot(X_sim, W) + u).flatten() gradvariables = [mu, logSigma, logLambd] logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y - f) / (T.exp(logLambd)))**2) logp = (negKL + logLike) / self.m optimizer = -logp self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore') self.f = th.function(gradvariables + [X, u, v], f, on_unused_input='ignore') self.logLike = th.function(gradvariables + [X, u, v], logLike, on_unused_input='ignore') derivatives = T.grad(logp, gradvariables) derivatives.append(logp) self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore') self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables, inputs=[X, u, v], conjugate=True, max_iter=1)
def createGradientFunctions(self): #Create the Theano variables W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps") #Create biases as cols so they can be broadcasted for minibatches b1,b2,b3,b4,b5,b6,pi = T.dcols("b1","b2","b3","b4","b5","b6","pi") if self.continuous: h_encoder = T.nnet.softplus(T.dot(W1,x) + b1) else: h_encoder = T.tanh(T.dot(W1,x) + b1) print type(pi) rng = T.shared_randomstreams.RandomStreams(seed=124) i = rng.choice(size=(1,), a=self.num_model, p=T.nnet.softmax(pi.T).T.flatten()) mu_encoder = T.dot(W2[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder) + b2[i[0]*self.dimZ:(1+i[0])*self.dimZ] log_sigma_encoder = (0.5*(T.dot(W3[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder)))+ b3[i[0]*self.dimZ:(1+i[0])*self.dimZ] z = mu_encoder + T.exp(log_sigma_encoder)*eps prior = 0 for i in range(self.num_model): prior += T.exp(pi[i][0])*0.5* T.sum(1 + 2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ] - mu_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ]**2 - T.exp(2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ])) prior /= T.sum(T.exp(pi)) #Set up decoding layer if self.continuous: h_decoder = T.nnet.softplus(T.dot(W4,z) + b4) mu_decoder = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5) log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6) logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2) gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6,pi] else: h_decoder = T.tanh(T.dot(W4,z) + b4) y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5) logpxz = -T.nnet.binary_crossentropy(y,x).sum() gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5,pi] logp = logpxz + prior #Compute all the gradients derivatives = T.grad(logp,gradvariables) #Add the lowerbound so we can keep track of results derivatives.append(logpxz) self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore') self.hiddenstatefunction = th.function(gradvariables + [x,eps], z, on_unused_input='ignore')
def examine(sample, expected, layers): layer1 = layers[0] layer2 = layers[1] layer3 = layers[2] samplesize = sample.shape[0] x, y = T.dmatrices('x', 'y') firstoutput = 1 / (1 + T.exp(-T.dot(x, layer1.weights) - layer1.bias)) secondoutput = 1 / ( 1 + T.exp(-T.dot(firstoutput, layer2.weights) - layer2.bias)) finaloutput = T.dot(secondoutput, layer3.weights) + layer3.bias err = ((finaloutput - y)**2).sum() f = theano.function([x, y], err) outcome = f(sample, expected) result = outcome / samplesize return result
def calc2elements(): """ 一次计算两个输入元素。 http://deeplearning.net/software/theano/tutorial/examples.html 这是计算对数函数曲线的y值。输入一个矩阵,元素是x的取值,输出是与输入矩阵中元素对应的y值。 """ import theano.tensor as T from theano import pp a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_square = diff ** 2 f = function([a, b], [diff, abs_diff, diff_square]) diff, abs_diff, diff_square = f([[1, 1], [1, 1]], [[0, 1], [2, 3]]) print (diff) print (abs_diff) print (diff_square)
def __init__(self, initial_params=None): print 'Setting up variables ...' # Parameters if initial_params is None: initial_params = {'mean':None, 'sigma_n':0.+np_uniform_scalar(0), 'sigma_f':0.+np_uniform_scalar(0), 'l_k':0.+np.uniform_scalar(0)} if initial_params['mean'] == None: self.mean = shared_scalar(0.) self.meanfunc = 'zero' else: self.mean = shared_scalar(initial_params['mean']) self.meanfunc = 'const' self.sigma_n = shared_scalar(initial_params['sigma_n']) self.sigma_f = shared_scalar(initial_params['sigma_f']) self.l_k = shared_scalar(initial_params['l_k']) # Variables X,Y,x_test = T.dmatrices('X','Y','x_test') print 'Setting up model ...' K, Ks, Kss, y_test_mu, y_test_var, log_likelihood,L,alpha,V,fs2,sW = self.get_model(X, Y, x_test) print 'Compiling model ...' inputs = {'X': X, 'Y': Y, 'x_test': x_test} # solve a bug with derivative wrt inputs not in the graph z = 0.0*sum([T.sum(v) for v in inputs.values()]) f = zip(['K', 'Ks', 'Kss', 'y_test_mu', 'y_test_var', 'log_likelihood', 'L','alpha','V','fs2','sW'], [K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha,V,fs2,sW]) self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n, f in f} if self.meanfunc == 'zero': wrt = {'sigma_n':self.sigma_n, 'sigma_f':self.sigma_f, 'l_k':self.l_k} else: wrt = {'mean':self.mean,'sigma_n':self.sigma_n, 'sigma_f':self.sigma_f, 'l_k':self.l_k} self.g = {vn: theano.function(inputs.values(), T.grad(log_likelihood,vv), name=vn,on_unused_input='ignore') for vn, vv in wrt.iteritems()}
def JacobiTimesVector(): W, V = T.dmatrices(['W', 'V']) x = T.dvector('x') y = T.dot(x, W) JV = T.Rop(y, W, V) f = function([W, V, x], JV) print(f( [[1, 1], [1, 1]], [[2, 2], [2, 2]], [0, 1] )) v = T.dvector('v') VJ = T.Lop(y, W, v) fL = function([v, x], VJ) print(fL( [2, 2], [0, 1] ))
def main(): x = T.dmatrix('x') # T.exp s = 1 / (1 + T.exp(-x)) logistic = function([x], s) # 0 is 0.5, negative < 0.5... print(logistic([[0, 1], [-1, -2]])) # logistic function can be expressed with hyperbolic tan term s2 = (1 + T.tanh(x / 2)) / 2 logistic2 = function([x], s2) print( np.allclose(logistic([[0, 1], [-1, -2]]), logistic2([[0, 1], [-1, -2]]))) # do more things at a time a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = function([a, b], [diff, abs_diff, diff_squared]) print(f([[1, 1], [1, 1]], [[0, 1], [2, 3]])) # default value x, y = T.dscalars('x', 'y') z = x + y f = function([x, In(y, value=1)], z) print(f(33)) print(f(33, 2)) # Inputs with default values must follow inputs without default # values (like Python’s functions). There can be multiple inputs # with default values. These parameters can be set positionally # or by name, as in standard Python x, y, w = T.dscalars('x', 'y', 'w') z = (x + y) * w f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z) print(f(33)) print(f(33, 2)) print(f(33, 0, 1)) print(f(33, w_by_name=1)) print(f(33, w_by_name=1, y=0))
def __init__(self, n_x, n_h, n_y, lr=0, nonlinear='softplus', valid_x=None, valid_y=None): print 'PL', n_x, n_h, n_y, lr, nonlinear if lr == 0: lr = 10. / n_h self.lr = lr self.fitted = False self.n_x = n_x self.n_h = n_h self.n_y = n_y self.nonlinear = nonlinear self.valid_x = valid_x self.valid_y = valid_y if self.nonlinear == 'softplus': def g(_x): return T.log(T.exp(_x) + 1) else: raise Exception() # Define Theano computational graph x, y, w1, b1, w2, b2, A = T.dmatrices('x', 'y', 'w1', 'b1', 'w2', 'b2', 'A') h1 = g(T.dot(w1, x) + T.dot(b1, A)) h2 = g(T.dot(w2, h1) + T.dot(b2, A)) p = T.nnet.softmax(h2.T).T logpy = (-T.nnet.categorical_crossentropy(p.T, y.T).T).reshape((1, -1)) dlogpy_dw = T.grad(logpy.sum(), [w1, b1, w2, b2]) H = T.nnet.categorical_crossentropy(p.T, p.T).T #entropy dH_dw = T.grad(H.sum(), [w1, b1, w2, b2]) # Define functions to call self.f_p = theano.function([x, w1, b1, w2, b2, A], p) self.f_dlogpy_dw = theano.function([x, y, w1, b1, w2, b2, A], [logpy] + dlogpy_dw) self.f_dH_dw = theano.function([x, w1, b1, w2, b2, A], [H] + dH_dw)
def _getModel(): s1, s2 = T.dvectors('s1', 's2') t1, t2 = T.dmatrices('t1', 't2') gw = T.dvector('gw') prank = T.dvector('prank') r1 = T.dot(t1, prank) r2 = T.dot(t2, prank) erd = T.exp(r2 - r1) p = erd / (erd + 1) loglterms = gw * ((s1 * T.log(1 - p)) + (s2 * T.log(p))) logl = -T.sum(loglterms) gradf = T.grad(logl, prank) hessf = theano.gradient.hessian(logl, prank) return s1, s2, t1, t2, gw, prank, loglterms, logl, gradf, hessf
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' y = T.dmatrices("y") i = T.iscalar("i") v = T.dscalar("i") xStart = T.dvector("xStart") mu = self.params[0] #logSigma = sharedX(np.random.uniform(0, 1, (self.dimTheta, 1)), name='logSigma') logSigma = self.params[1] #logLambda = sharedX(np.random.uniform(0, 10), name='logLambda') logLambda = self.params[2] negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2) self.k = mu+T.exp(logSigma)*v V1 = T.dvector("V2") V2 = T.dvector("V2") results, updates = th.scan(fn=self.fisher_wright_normal_approx, outputs_info=[{'initial':xStart,'taps':[-1]}],sequences=[V1,V2], n_steps=i) f = results logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f)**2)/(T.exp(logLambda)**2) part2 = f #0.5*T.sum((y-f)**2) #/(T.exp(logLambda)**2) elbo = (negKL + logLike) obj = -elbo test1 = y[0:self.i/4,:].sum(axis=0)/(self.i/4) test2 = y[self.i/4:self.i/2].sum(axis=0)/(self.i/4) self.test = th.function([xStart, i, y, v, V1, V2],test,on_unused_input='ignore') self.part2 = th.function([xStart, i, y, v, V1, V2], part2, updates=updates, on_unused_input='ignore') self.logLike = th.function([xStart, i, y, v, V1, V2], logLike, updates=updates, on_unused_input='ignore') self.lowerboundfunction = th.function([xStart, i, y, v, V1, V2], obj, updates=updates, on_unused_input='ignore') derivatives = T.grad(obj, self.params) self.gradientfunction = th.function([xStart, i, y, v, V1, V2], derivatives, updates=updates, on_unused_input='ignore')
def train(sample, expected, iteration, layers=None): ''' sample and expected are matrices where each rows are a single datum and the size of the rows are the size of the datum ''' vectorinlength = sample.shape[1] vectoroutlength = expected.shape[1] datasize = sample.shape[0] if layers is None: layer1 = layer(vectorinlength, 30) layer2 = layer(30, 10) layer3 = layer(10, vectoroutlength) else: layer1 = layers[0] layer2 = layers[1] layer3 = layers[2] x, y = T.dmatrices('x', 'y') firstoutput = T.tanh(T.dot(x, layer1.weights) + layer1.bias) secondoutput = T.tanh(T.dot(firstoutput, layer2.weights) + layer2.bias) finaloutput = T.tanh(T.dot(secondoutput, layer3.weights) + layer3.bias) err = ((finaloutput - y)**2).mean() f = theano.function( [x, y], err, updates=((layer1.weights, layer1.weights - 0.05 * T.grad(err, layer1.weights)), (layer1.bias, layer1.bias - 0.05 * T.grad(err, layer1.bias)), (layer2.weights, layer2.weights - 0.05 * T.grad(err, layer2.weights)), (layer2.bias, layer2.bias - 0.05 * T.grad(err, layer2.bias)), (layer3.weights, layer3.weights - (0.05 / datasize / datasize) * T.grad(err, layer3.weights)), (layer3.bias, layer3.bias - 0.05 * T.grad(err, layer3.bias)))) totalerr = 0 for i in range(iteration): totalerr = f(sample, expected) print(totalerr) return layer1, layer2, layer3
def test(): # multiple inputs, multiple outputs a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = T.abs_(diff) sqr_diff = diff ** 2 f = function([a, b], [diff, abs_diff, sqr_diff]) h, i, j = f([[0, 1], [2, 3]], [[4, 5], [6, 7]]) # default value for function arguments a, b = T.dscalars('a', 'b') z = a + b f = function([a, Param(b, default=1)], z) print f(1, b=2) print f(1) print f(1, 2) # shared variable state = shared(0) inc = T.lscalar('inc') # state is int64 by default accumulator = function([inc], state, updates=[(state, state + inc)]) print accumulator(300) print state.get_value()
def compute_tanh(): rng = np.random # 定义自变量 x, w = T.dmatrices('x', 'w') b = T.dvector('b') # 定义循环体,计算因变量 y, u = theano.scan(lambda i, w, b: T.tanh(T.dot(i, w) + b), sequences=x, non_sequences=[w, b]) # 构造完整计算方法 result = function([x, w, b], y) # 初始化数据 x_t = rng.rand(4, 5) w_t = rng.rand(5, 4) b_t = rng.rand(4) # 喂数据 print x_t print w_t print result(x_t, w_t, b_t)
def test_duplicate_updates(self): x, y = dmatrices('x', 'y') z = shared(numpy.ones((2, 3))) self.assertRaises(ValueError, theano.function, [x, y], [z], updates=[(z, (z + x + y)), (z, (z - x))])
# the agent can go forward or backward by one state with wrapping(so if you go back from the 1st state you go to the # end). states = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0] NUM_STATES = len(states) NUM_ACTIONS = 2 FUTURE_REWARD_DISCOUNT = 0.5 LEARNING_RATE = 0.1 def hot_one_state(index): array = np.zeros(NUM_STATES) array[index] = 1. return array.reshape(array.shape[0], 1) # Theano is sad if the shape looks like (10,) rather than (10,1) state, targets = T.dmatrices('state', 'targets') hidden_weights = theano.shared(value=np.zeros((NUM_ACTIONS, NUM_STATES)), name='hidden_weights') output_fn = T.dot(hidden_weights, state) output = theano.function([state], output_fn) states_input = T.dmatrix('states_input') loss_fn = T.mean((T.dot(hidden_weights, states_input) - targets) ** 2) gradient = T.grad(cost=loss_fn, wrt=hidden_weights) train_model = theano.function( inputs=[states_input, targets], outputs=loss_fn, updates=[[hidden_weights, hidden_weights - LEARNING_RATE * gradient]], allow_input_downcast=True
from theano import tensor as T import theano, time, numpy rows, cols = 2, 2 param, constant = T.dmatrices('param', 'constant') p1, p2, p3, p4 = T.dmatrices('p1', 'p2', 'p3', 'p4') x, y, z, w, u = T.dmatrices('x', 'y', 'z', 'w', 'u') ones = numpy.ones((rows, cols)) zeros = numpy.zeros((rows, cols)) # Second Group of rules - Decision Tree rules , apply to all nodes def rule(p1, *therest): param = eval('p1 + 1/2') param = T.switch(eval('T.gt(param,1)'), ones, zeros) return param #param = eval('p1+p2/p3') list = [p1] f_switch = theano.function(list, eval('rule(p1)'), mode=theano.Mode(linker='vm')) m1 = numpy.random.rand(rows, cols) m2 = numpy.random.rand(rows, cols) m3 = numpy.random.rand(rows, cols)
vec = T.dvector() scal = T.dscalar() sv_add = vec + scal f_add = function([vec, scal], sv_add) print f_add([1, 2, 3, 4], 2) x = T.dmatrix() y = T.dmatrix() z = x + y f = function([x, y], z) print f([[1, 1], [2, 2]], [[3, 3], [4, 4]]) x, y = T.dmatrices('x', 'y') s = 1 / (1 + T.exp(-x)) logistic = function([x], s) print logistic([[0, 1], [-1, -2]]) a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = function([a, b], [diff, abs_diff, diff_squared]) print f([[1, 1], [1, 1]], [[0, 1], [2, 3]]) from theano import shared
def __init__(self, params,correct, samples = 20,batch_size=None): ker = kernel() self.samples = samples self.params = params self.batch_size=batch_size #データの保存ファイル model_file_name = 'model2' + '.save' #もしこれまでに作ったのがあるならロードする try: print ('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g,self.ES_US= obj print ('Loaded!') return except: print ('Failed. Creating a new model...') X,Y,X_test,m,S_b,Z,eps_NQ,eps_M=\ T.dmatrices('X','Y','X_test','m','S_b','Z','eps_NQ','eps_M') mu,Sigma=T.dmatrices('mu','Sigma') lhyp = T.dvector('lhyp') ls=T.dvector('ls') N,Q= m.shape M=Z.shape[0] D=X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q]) S=T.exp(S_b) Xtilda = m + S * eps_NQ print ('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) from theano.tensor.shared_randomstreams import RandomStreams srng = RandomStreams(seed=234) rv_u = srng.normal((2,N,Q)) rv_s = srng.normal((2,N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘 xx_s=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_s xxx_s=xx_s.reshape([2,N,1,Q]) zz=Z.reshape([1,1,M,Q]) rbf_u=T.exp(-T.sum(((xxx_s-zz)**2)/(2*l.reshape([1,1,1,Q])),-1))*sf2#N×M A=Kmm+beta*T.sum(T.mean(rbf_u.reshape([2,M,1,N])*rbf_u.reshape([2,1,M,N]),0),-1) Ainv=sT.matrix_inverse(A) Sigma_f=T.dot(Kmm,T.dot(Ainv,Kmm)) xx=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_u xxx=xx.reshape([2,N,1,Q]) rbf=T.mean(T.exp(-T.sum(((xxx-zz)**2)/(2*l.reshape([1,1,1,Q])),-1)),0)#N×M RHS=T.sum(rbf.reshape([M,1,N])*X.reshape([1,D,N]),2) mu_f=beta*T.dot(Kmm,T.dot(Ainv,RHS)) self.ES_US = theano.function([m,S_b,Z,X,lhyp,ls], [mu_f,Sigma_f],on_unused_input='ignore') rv_u_d = srng.normal((N,Q)) rv_s_d = srng.normal((N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘 Xtilda_u = m + S * rv_u_d Xtilda_s = m + S * rv_s_d Kmn_u = ker.RBF(sf2, l, Z, Xtilda_u) Kmn_s = ker.RBF(sf2, l, Z, Xtilda_s) print ('Modeling...') Kmn = ker.RBF(sf2,l,Z,Xtilda) Knn = ker.RBF(sf2,l,Xtilda,Xtilda) Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn)) Kinterval=T.dot(KmmInv,Kmn) #スケール変換 Sigma_L=sT.cholesky(Sigma) U = mu+Sigma_L.dot(eps_M) mean_U=T.dot(Kinterval.T,U) Covariance = beta LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct KL_X = -self.KLD_X(m,S)*correct KL_U = -self.KLD_U(mu, Sigma_L, Kmm,KmmInv) print ('Compiling model ...') inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma': Sigma, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ} z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])} wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'lhyp': lhyp, 'ls': ls} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()} with open(model_file_name, 'wb') as file_handle: print ('Saving model...') sys.setrecursionlimit(2000) pickle.dump([self.f, self.g,self.ES_US], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, layer_sizes, epochs, batch_size, learn_rate, init_seed=None, verbose=False, cost='MSE', regularizer=None, l=1, report_every=1): """ A multi-layer perceptron implemented using Theano :param layer_sizes: sizes of the layers, including input and output :param epochs: number of epochs to run :param batch_size: number of samples in each minibatch. :param learn_rate: hyperparameter controlling learning velocity :param init_seed: used to seed numpy RNG :param verbose: 0 for nothing, 1 for cost updates :param cost: 'MSE'(mean squared error), 'cross entropy' :param regularizer: 'weight decay' or None :param l: coefficient of regularizer """ assert len(layer_sizes) > 1 self.init_seed = init_seed self.layer_sizes = layer_sizes self.epochs = epochs self.batch_size = batch_size self.learn_rate = learn_rate self.verbose = verbose self.cost = cost self.regularizer = regularizer self.l = l self.cost_value = None self.report_every = report_every n_layers = len(layer_sizes) trainxvar, trainyvar = T.dmatrices('xt', 'yt') x, y = T.dmatrices('x', 'y') # initializing the weights and biases randomly weights = [] biases = [] np.random.seed(init_seed) for i in range(n_layers-1): weights.append(shared(np.random.randn(layer_sizes[i], layer_sizes[i+1]), name='w{}'.format(i))) biases.append(shared(np.random.randn(layer_sizes[i+1]), name='b{}'.format(i))) # forward propagation a = [] for i in range(n_layers-1): if i == 0: a.append( 1 / (1 + T.exp(-(T.dot(x, weights[i]) + biases[i]))) ) else: a.append( 1 / (1 + T.exp(-(T.dot(a[i-1], weights[i]) + biases[i]))) ) self.a = a self.w = weights self._feed_forward = function([x], a[-1]) self._predict_best = function([x], a[-1].argmax(axis=1)) self._predict_activation = function([x], a[-1].round()) # creating cost function if cost == 'MSE': err = (y - a[-1]) ** 2 / 2 elif cost == 'cross entropy': err = -y * T.log(a[-1]) - (1 - y)*T.log(1 - a[-1]) else: raise ValueError("Unknown cost function, {}".format(cost)) # adding regularization function to it if regularizer is None: cost_f = err.mean() elif regularizer == 'weight decay': cost_f = err.mean() + l / (2 * x.size[0]) * T.sum([(_w ** 2).sum() for _w in weights]) else: raise ValueError("Unknown regularization method, {}".format(regularizer)) self._get_cost = function([x, y], cost_f) # creating training function dweights = T.grad(cost_f, weights) dbiases = T.grad(cost_f, biases) idx = T.lscalar() self._train = function(inputs=[idx, trainxvar, trainyvar], outputs=[cost_f], updates=[(_w, _w - learn_rate * _gw) for _w, _gw in zip(weights, dweights)] + [(_b, _b - learn_rate * _gb) for _b, _gb in zip(biases, dbiases)], givens=[ (x, trainxvar[batch_size*idx: batch_size*(idx+1)]), (y, trainyvar[batch_size*idx: batch_size*(idx+1)]) ])
from numpy import empty, inf, zeros, array, abs, count_nonzero from matplotlib.pyplot import ion, draw, plot, savefig from cv2 import imwrite, waitKey from LogisticRegression import LogisticRegression as LogReg from theano import function, pp, config as cfg from time import sleep # cfg.openmp = True import theano.tensor as T from dataset import loadData, OneToMany from visual import visualize Tr, Ts, _ = loadData('mnist.pkl.gz', True) m_sample = Tr[0].shape[0] m_test_sample = Ts[1].shape[0] x, y = T.dmatrices('x', 'y') L = LogReg(x, 784, 10) lam = 0.04 p = L.predict() l = L.cost(y) + L.regularizer(lam) gw = T.grad(l, wrt=L.W) gb = T.grad(l, wrt=L.B) alpha = 0.05 W_shape = L.weightShapes()[0] B_shape = L.weightShapes()[1] VW = zeros(W_shape) VB = zeros(B_shape) train = function([x, y], [l, gw, gb])
import numpy as np import theano.tensor as T import theano import os from PIL import Image from PIL import ImageDraw y1, y2 = T.dmatrices('y1', 'y2') loss = 0.0 scale_vector = [] scale_vector.extend([2] * 4) scale_vector.extend([1] * 20) scale_vector = np.reshape(np.asarray(scale_vector), (1, len(scale_vector))) for i in range(2): y1_piece = y1[:, i * 25:i * 25 + 24] y2_piece = y2[:, i * 25:i * 25 + 24] y1_piece = y1_piece * scale_vector y2_piece = y2_piece * scale_vector loss_piece = T.sum(T.square(y1_piece - y2_piece), axis=1) loss = loss + loss_piece * y2[:, i * 25 + 24] closs = T.square(y2[:, i * 25 + 24] - y1[:, i * 25 + 24]) cmask = (1 - y2[:, i * 25 + 24]) * 0.5 + y2[:, i * 25 + 24] closs = closs * cmask loss = loss + closs loss = T.sum(loss)
import theano import theano.tensor as T a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) print f([[1, 2], [-1, 2]], [[0, 1], [3, 4]])
import theano.tensor as T a,b = T.dmatrices('a','b') x,y = T.dmatrices('x','y') is_train=1 #1=training,2=test z= T.switch(T.neq(is_train, 0), 1, 2) print z.eval()
import theano import theano.tensor as tensor from pprint import pprint matrix = tensor.dmatrix('matrix') logistic_expression = 1 / (1 + tensor.exp(-matrix)) logistic_function = theano.function([matrix], logistic_expression) pprint( logistic_function([[0, 1], [-1, -2]])) a, b = tensor.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) pprint(f([[1, 1], [1, 1]], [[0, 1], [2, 3]]))
from theano import function import cv2 as cv import copy, numpy as np import math as mt import logging def log(percent, flag=1): logging.basicConfig(filename='percentage.log', level=logging.DEBUG) if flag == 1: logging.info(percent) if flag == 2: logging.debug(percent) X = T.dmatrices('X') B = T.dmatrices('B') W = T.dmatrices('W') Y = T.dmatrices('Y') Y_act = T.dvector('Y_act') Y_cal = T.dvector('Y_cal') Error = T.scalar(dtype=X.dtype) P = T.scalar(dtype=X.dtype) output = (1 / (1 + T.exp(-X))) sigmoid = function([X], output) output1 = X #T.tanh(X) norm = function([X], output1) # ouputs class for input sample
import theano import numpy as np from theano import function from theano import tensor as T from theano.tensor.shared_randomstreams import RandomStreams as RS X, Y = T.dmatrices(2) B = T.dvector() components, updates = theano.scan(lambda x, y, b: T.tanh(T.dot(x, y) + b), sequences=X, non_sequences=[Y, B]) ele_comp = function([X, Y, B], components) dim = 10 X_realization = np.ones((dim, dim), dtype='float64') Y_realization = np.ones((dim, dim), dtype='float64') prng = RS(seed=9000) B_real = prng.normal((dim, ), avg=0, std=2, dtype='float64') B_realization = function([], B_real) print ele_comp(X_realization, Y_realization, B_realization()) ################################################################################################################### ###################################Evaluating a polynomial######################################################### ################################################################################################################### co_eff = T.dvector() free_var = T.dscalar() max_coeff = T.iscalar() components, updates = theano.scan(lambda ce, power, fv: ce * (fv**power), sequences=[co_eff,
def compile_theano(): """ This function generates theano compiled kernels for energy and force learning ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn The position of the atoms relative to the centrla one, and their chemical species are defined by a matrix of dimension Mx5 Returns: k3_ee (func): energy-energy kernel k3_ef (func): energy-force kernel k3_ff (func): force-force kernel """ if not (os.path.exists(Mffpath / 'k3_ee_s.pickle') and os.path.exists(Mffpath / 'k3_ef_s.pickle') and os.path.exists(Mffpath / 'k3_ff_s.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info("Started compilation of theano three body kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters theta = T.dscalar('theta') rc = T.dscalar('rc') # positions of neighbours without chemical species rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] # -------------------------------------------------- # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS # -------------------------------------------------- # first and second configuration r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1)) rjk = T.sqrt( T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2)) rmn = T.sqrt( T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2)) # -------------------------------------------------- # BUILD THE KERNEL # -------------------------------------------------- # Squared exp of differences se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2)) se_jkmn = T.exp( -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 / (2 * sig**2)) se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 / (2 * sig**2)) se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 / (2 * sig**2)) # Kernel not summed (cyclic permutations) k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] * se_jkmn) k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] * se_1j2m[None, :, :, None]) k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] * se_1jmn[None, :, :, :]) # final shape is M1 M1 M2 M2 ker = k1n + k2n + k3n cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) * ( (T.sgn(rc - r1j) + 1) / 2) cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) * ( (T.sgn(rc - r2m) + 1) / 2) cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * ( 1 + T.cos(np.pi * rjk / rc)) * ((T.sgn(rc - rjk) + 1) / 2) cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * ( 1 + T.cos(np.pi * rmn / rc)) * ((T.sgn(rc - rmn) + 1) / 2) # -------------------------------------------------- # REMOVE DIAGONAL ELEMENTS AND ADD CUTOFF # -------------------------------------------------- # remove diagonal elements AND lower triangular ones from first configuration mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk) # remove diagonal elements from second configuration mask_mn = T.ones_like(rmn) - T.identity_like(rmn) # Combine masks mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :] # Apply mask and then apply cutoff functions ker = ker * mask_jkmn ker = T.sum(ker * cut_jk[:, :, None, None] * cut_mn[None, None, :, :]) # -------------------------------------------------- # FINAL FUNCTIONS # -------------------------------------------------- # global energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc], ker, on_unused_input='ignore') # global energy force kernel k_ef = T.grad(ker, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ef, on_unused_input='ignore') # local force force kernel k_ff = T.grad(ker, r1) k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2), sequences=T.arange(k_ff.shape[0]), non_sequences=[k_ff, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ff_der, on_unused_input='ignore') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'k3_ee_s.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'k3_ef_s.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'k3_ff_s.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) else: print("Loading Kernels") with open(Mffpath / "k3_ee_s.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "k3_ef_s.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "k3_ff_s.pickle", 'rb') as f: k_ff_fun = pickle.load(f) # WRAPPERS (we don't want to plug the position of the central element every time) def k3_ee(conf1, conf2, sig, theta, rc): """ Three body kernel for global energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (float): scalar valued energy-energy 3-body kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k3_ef(conf1, conf2, sig, theta, rc): """ Three body kernel for global energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (array): 3x1 energy-force 3-body kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k3_ff(conf1, conf2, sig, theta, rc): """ Three body kernel for local force-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (matrix): 3x3 force-force 3-body kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) logger.info("Ended compilation of theano three body kernels") return k3_ee, k3_ef, k3_ff
def test_duplicate_updates(self): x, y = dmatrices("x", "y") z = shared(np.ones((2, 3))) with pytest.raises(ValueError): theano.function([x, y], [z], updates=[(z, (z + x + y)), (z, (z - x))])
def __init__(self, params, correct, samples=20, batch_size=None): ker = kernel() self.samples = samples self.params = params self.batch_size = batch_size #データの保存ファイル model_file_name = 'model2' + '.save' #もしこれまでに作ったのがあるならロードする try: print('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g = obj print('Loaded!') return except: print('Failed. Creating a new model...') X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\ T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M') lhyp = T.dvector('lhyp') ls = T.dvector('ls') (M, D), N, Q = Z.shape, X.shape[0], X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q]) S = T.exp(S_b) #Sigma=T.exp(self.Sigma_b) #xについてはルートを取らなくても対角行列なので問題なし #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b)))) #スケール変換 mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma Xtilda = m + S * eps_NQ U = mu_scaled + Sigma_scaled.dot(eps_M) print('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) #KmmInv_cache = sT.matrix_inverse(Kmm) #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm') #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている #逆行列の微分関数を作っている #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), # 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print('Modeling...') Kmn = ker.RBF(sf2, l, Z, Xtilda) Knn = ker.RBF(sf2, l, Xtilda, Xtilda) Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn)) Kinterval = T.dot(KmmInv, Kmn) mean_U = T.dot(Kinterval.T, U) Covariance = beta LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum( (T.eye(N) * Ktilda))) * correct KL_X = -self.KLD_X(m, S) * correct KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv) print('Compiling model ...') inputs = { 'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ } z = 0.0 * sum([ T.sum(v) for v in inputs.values() ]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])} wrt = { 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls } self.g = { vn: { gn: theano.function(list(inputs.values()), T.grad(gv + z, vv), name='d' + gn + '_d' + vn, on_unused_input='ignore') for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X]) } for vn, vv in wrt.items() } with open(model_file_name, 'wb') as file_handle: print('Saving model...') sys.setrecursionlimit(2000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
f = theano.function([a,b],out) print(f([0,1,2],[1,2,3])) ## Logistic x = T.dmatrix("x") s = 1 / (1 + T.exp(-x)) logistic = theano.function([x], s) logistic([[0,1], [-1,-2]]) # elementwise because constituent operations are elementwise ## Equivalently, can use tanh function s2 = (1 + T.tanh(x / 2)) / 2 logistic2 = function([x], s2) logistic2([[0,1], [-1,-2]]) # elementwise because constituent operations are elementwise ## Function can also have multiple outputs a, b = T.dmatrices('a', 'b') c, d = T.dmatrices('c', 'd') diff = a - b abs_diff = abs(diff) diff_squared = diff ** 2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) f([[1,1], [1,1]], [[0,1],[2,3]]) ## Default value for argument x, y = T.dscalars('x', 'y') z = x + y f = theano.function([x, theano.Param(y, default = 1)], z) f(33) f(33,2) from theano import function
Created on Tue Jul 4 13:55:41 2017 @author: zhangli """ import numpy as np import theano.tensor as T import theano #activation function example激励函数 x = T.dmatrix('x') s = 1 / (1 + T.exp(-x)) #logistic or soft step,激励函数的一种 logistic = theano.function([x], s) print(logistic([[0, 1], [2, 3]])) # multiply outputs for a function a, b = T.dmatrices('a', 'b') #定义两个同类型的function是dmatrices diff = a - b abs_diff = abs(diff) diff_squared = diff**2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) print(f(np.ones((2, 2)), np.arange(4).reshape((2, 2)))) #name for a function x, y, w = T.dscalars('x', 'y', 'w') z = (x + y) * w f = theano.function( [x, theano.In(y, value=1), theano.In(w, value=2, name='weights')], z) print(f(23, 2, weights=4))
S = 1 / (1 + T.exp(-X)) # Define and test function matrix_add logistics = function([X], S) print "===== Function logistics tests =====" input = [[1, 2], [3, 4]] print "input: " print input print "Function logistics output: " print logistics(input) # Computing More than one Thing at the Same Time print "" print "===== Define matrices a,b and function diffs with three outputs =====" a, b = T.dmatrices('a', 'b') diff = a - b absdiff = abs(diff) squareddiff = diff ** 2 # Define and test function diffs diffs = function([a, b], [diff, absdiff, squareddiff]) print "===== Function diffs tests =====" input1 = [[1, 1], [1, 1]] print "input1: " print input1 input2 = [[0, 1], [2, 3]] print "input2: " print input2 print "Function diffs output: " print diffs(input1, input2)
def compile_theano(): """ This function generates theano compiled kernels for energy and force learning ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn The position of the atoms relative to the centrla one, and their chemical species are defined by a matrix of dimension Mx5 Returns: km_ee (func): energy-energy kernel km_ef (func): energy-force kernel km_ff (func): force-force kernel """ if not (os.path.exists(Mffpath / 'k3_ee_m.pickle') and os.path.exists(Mffpath / 'k3_ef_m.pickle') and os.path.exists(Mffpath / 'k3_ff_m.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info("Started compilation of theano three body kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters theta = T.dscalar('theta') rc = T.dscalar('rc') # positions of neighbours without chemical species rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] alpha_1 = rho1[:, 3].flatten() alpha_2 = rho2[:, 3].flatten() alpha_j = rho1[:, 4].flatten() alpha_m = rho2[:, 4].flatten() alpha_k = rho1[:, 4].flatten() alpha_n = rho2[:, 4].flatten() # -------------------------------------------------- # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS # -------------------------------------------------- # first and second configuration r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1)) rjk = T.sqrt( T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2)) rmn = T.sqrt( T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2)) # -------------------------------------------------- # CHEMICAL SPECIES MASK # -------------------------------------------------- # numerical kronecker def delta_alpha2(a1j, a2m): d = np.exp(-(a1j - a2m)**2 / (2 * 0.00001**2)) return d # permutation 1 delta_alphas12 = delta_alpha2(alpha_1[0], alpha_2[0]) delta_alphasjm = delta_alpha2(alpha_j[:, None], alpha_m[None, :]) delta_alphas_jmkn = delta_alphasjm[:, None, :, None] * delta_alphasjm[None, :, None, :] delta_perm1 = delta_alphas12 * delta_alphas_jmkn # permutation 3 delta_alphas1m = delta_alpha2(alpha_1[0, None], alpha_m[None, :]).flatten() delta_alphasjn = delta_alpha2(alpha_j[:, None], alpha_n[None, :]) delta_alphask2 = delta_alpha2(alpha_k[:, None], alpha_2[None, 0]).flatten() delta_perm3 = delta_alphas1m[None, None, :, None] * delta_alphasjn[:, None, None, :] * \ delta_alphask2[None, :, None, None] # permutation 5 delta_alphas1n = delta_alpha2(alpha_1[0, None], alpha_n[None, :]).flatten() delta_alphasj2 = delta_alpha2(alpha_j[:, None], alpha_2[None, 0]).flatten() delta_alphaskm = delta_alpha2(alpha_k[:, None], alpha_m[None, :]) delta_perm5 = delta_alphas1n[None, None, None, :] * delta_alphaskm[None, :, :, None] * \ delta_alphasj2[:, None, None, None] # -------------------------------------------------- # BUILD THE KERNEL # -------------------------------------------------- # Squared exp of differences se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2)) se_jkmn = T.exp( -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 / (2 * sig**2)) se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 / (2 * sig**2)) se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 / (2 * sig**2)) # Kernel not summed (cyclic permutations) k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] * se_jkmn) k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] * se_1j2m[None, :, :, None]) k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] * se_1jmn[None, :, :, :]) # final shape is M1 M1 M2 M2 ker_loc = k1n * delta_perm1 + k2n * delta_perm3 + k3n * delta_perm5 # Faster version of cutoff (less calculations) cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * ( 1 + T.cos(np.pi * rjk / rc)) cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * ( 1 + T.cos(np.pi * rmn / rc)) # -------------------------------------------------- # REMOVE DIAGONAL ELEMENTS # -------------------------------------------------- # remove diagonal elements AND lower triangular ones from first configuration mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk) # remove diagonal elements from second configuration mask_mn = T.ones_like(rmn) - T.identity_like(rmn) # Combine masks mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :] # Apply mask and then apply cutoff functions ker_loc = ker_loc * mask_jkmn ker_loc = T.sum(ker_loc * cut_jk[:, :, None, None] * cut_mn[None, None, :, :]) ker_loc = T.exp(ker_loc / 20) # -------------------------------------------------- # FINAL FUNCTIONS # -------------------------------------------------- # energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc], ker_loc, on_unused_input='ignore') # energy force kernel k_ef_cut = T.grad(ker_loc, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ef_cut, on_unused_input='ignore') # force force kernel k_ff_cut = T.grad(ker_loc, r1) k_ff_cut_der, updates = scan( lambda j, k_ff_cut, r2: T.grad(k_ff_cut[j], r2), sequences=T.arange(k_ff_cut.shape[0]), non_sequences=[k_ff_cut, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ff_cut_der, on_unused_input='ignore') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'k3_ee_m.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'k3_ef_m.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'k3_ff_m.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) else: print("Loading Kernels") with open(Mffpath / "k3_ee_m.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "k3_ef_m.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "k3_ff_m.pickle", 'rb') as f: k_ff_fun = pickle.load(f) # WRAPPERS (we don't want to plug the position of the central element every time) def km_ee(conf1, conf2, sig, theta, rc): """ Many body kernel for energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (float): scalar valued energy-energy many-body kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def km_ef(conf1, conf2, sig, theta, rc): """ Many body kernel for energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (array): 3x1 energy-force many-body kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def km_ff(conf1, conf2, sig, theta, rc): """ Many body kernel for force-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (matrix): 3x3 force-force many-body kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) logger.info("Ended compilation of theano many body kernels") return km_ee, km_ef, km_ff
def __init__(self, params,correct,Xinfo, samples = 500,batch_size=None): ker = kernel() mmd = MMD() self.samples = samples self.params = params self.batch_size=batch_size self.Xlabel_value=Xinfo["Xlabel_value"] self.Weight_value=Xinfo["Weight_value"] #データの保存ファイル model_file_name = 'model_MMD_kernel' + '.save' #もしこれまでに作ったのがあるならロードする try: print ('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g= obj print ('Loaded!') return except: print ('Failed. Creating a new model...') X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\ T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M') Xlabel=T.dmatrix('Xlabel') Zlabel=T.dmatrix('Zlabel') Zlabel_T=T.exp(Zlabel)/T.sum(T.exp(Zlabel),1)[:,None]#ラベルは確率なので正の値でかつ、企画化されている Weight=T.dmatrix('Weight') lhyp = T.dvector('lhyp') ls=T.dvector('ls') ga=T.dvector('ga') (M, D), N, Q = Z.shape, X.shape[0], X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls) gamma=T.exp(ga[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q]) S=T.exp(S_b) #Sigma=T.exp(self.Sigma_b) #xについてはルートを取らなくても対角行列なので問題なし #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b)))) #スケール変換 mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma Xtilda = m + S * eps_NQ U = mu_scaled+Sigma_scaled.dot(eps_M) print ('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) Kmm=mmd.MMD_kenel_Xonly(gamma,Zlabel_T,Kmm,Weight) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) #KmmInv_cache = sT.matrix_inverse(Kmm) #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm') #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている #逆行列の微分関数を作っている #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), # 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print ('Modeling...') Kmn = ker.RBF(sf2,l,Z,Xtilda) Kmn=mmd.MMD_kenel_ZX(gamma,Zlabel_T,Xlabel,Kmn,Weight) Knn = ker.RBF(sf2,l,Xtilda,Xtilda) Knn=mmd.MMD_kenel_Xonly(gamma,Xlabel,Knn,Weight) Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn)) Kinterval=T.dot(KmmInv,Kmn) mean_U=T.dot(Kinterval.T,U) betaI=T.diag(T.dot(Xlabel,beta)) Covariance = betaI LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))*correct KL_X = -self.KLD_X(m,S)*correct KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv) print ('Compiling model ...') inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,'ga':ga,'Zlabel':Zlabel,'Weight':Weight,'Xlabel':Xlabel} z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])} wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls,'ga':ga,'Zlabel':Zlabel} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()} with open(model_file_name, 'wb') as file_handle: print ('Saving model...') sys.setrecursionlimit(10000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, params,correct, samples = 500,batch_size=None): ker = kernel() self.samples = samples self.params = params self.batch_size=batch_size #データの保存ファイル model_file_name = 'model2' + '.save' #もしこれまでに作ったのがあるならロードする try: print ('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g= obj print ('Loaded!') return except: print ('Failed. Creating a new model...') X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\ T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M') Wx, Ws, Wu=\ T.dmatrices('Wx', 'Ws', 'Wu') bx, bs, bu=\ T.dvectors('bx', 'bs', 'bu') gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\ T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s") lhyp = T.dvector('lhyp') ls=T.dvector('ls') (M, D), N, Q = Z.shape, X.shape[0], X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q]) #Sigma=T.exp(self.Sigma_b) #xについてはルートを取らなくても対角行列なので問題なし #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b)))) #スケール変換 mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma #隠れ層の生成 out1=self.neural_net_predict(Wx,bx,gamma_x,beta_x,X) m=self.neural_net_predict(Wu,bu,gamma_u,beta_u,out1) S=self.neural_net_predict(Ws,bs,gamma_s,beta_s,out1) #outputs1 = T.dot(X,Wx) + bx #m = T.dot(out1,Wu) + bu #S=T.dot(out1,Ws) + bs S=T.exp(S) S=T.sqrt(S) Xtilda = m+S*eps_NQ U = mu_scaled+Sigma_scaled.dot(eps_M) print ('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) #KmmInv_cache = sT.matrix_inverse(Kmm) #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm') #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている #逆行列の微分関数を作っている #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), # 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print ('Modeling...') Kmn = ker.RBF(sf2,l,Z,Xtilda) Knn = ker.RBF(sf2,l,Xtilda,Xtilda) Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn)) Kinterval=T.dot(KmmInv,Kmn) mean_U=T.dot(Kinterval.T,U) Covariance = beta LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct KL_X = -self.KLD_X(m,S)*correct KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv) print ('Compiling model ...') inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\ "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])} wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()} with open(model_file_name, 'wb') as file_handle: print ('Saving model...') sys.setrecursionlimit(2000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False): ker, self.samples, self.params, self.KmmInv = kernel(), samples, params, {} self.use_hat = use_hat model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save' try: print 'Trying to load model...' with open(model_file_name, 'rb') as file_handle: obj = cPickle.load(file_handle) self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj self.update_KmmInv_cache() print 'Loaded!' return except: print 'Failed. Creating a new model...' Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv') lhyp = T.dvector('lhyp') (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1] s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q]) L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL)))) print 'Setting up cache...' Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z) KmmInv_cache = sT.matrix_inverse(Kmm) self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm') self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') self.update_KmmInv_cache() self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print 'Setting up model...' if not self.use_hat: mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L X = m + s * eps_NQ U = mu_scaled + L_scaled.dot(eps_MK) Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X) Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X) A = KmmInv.dot(Kmn) B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0) F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK F = T.concatenate((T.zeros((N,1)), F), axis=1) S = T.nnet.softmax(F) LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16))) if not linear_model: KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2)) + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm)))))) else: KL_U = 0 #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M # - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0 else: # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5 mu_scaled, L_scaled = mu / sf2, L / sf2 X = m + s * eps_NQ U = mu_scaled + L_scaled.dot(eps_MK) Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X) Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X) B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0) F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK F = T.concatenate((T.zeros((N,1)), F), axis=1) S = T.nnet.softmax(F) LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16))) if not linear_model: KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2)) + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled))) - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm)))))) else: KL_U = 0 KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1) KL_X = T.sum(KL_X_all) print 'Compiling...' inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK} z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all]) self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f} g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X]) wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv} self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()} with open(model_file_name, 'wb') as file_handle: print 'Saving model...' sys.setrecursionlimit(2000) cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)