Esempio n. 1
0
 def build_theano_models(self, algo, algo_params):
     epsilon = 1e-6
     kl = lambda mu, sig: sig+mu**2-TT.log(sig)
     X, y = TT.dmatrices('X', 'y')
     params = TT.dvector('params')
     a, b, c, l_F, F, l_FC, FC = self.unpack_params(params)
     sig2_n, sig_f = TT.exp(2*a), TT.exp(b)
     l_FF = TT.dot(X, l_F)+l_FC
     FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1)
     Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1)
     Phi = sig_f*TT.sqrt(2./self.M)*Phi
     noise = TT.log(1+TT.exp(c))
     PhiTPhi = TT.dot(Phi.T, Phi)
     A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi)
     L = Tlin.cholesky(A)
     Li = Tlin.matrix_inverse(L)
     PhiTy = Phi.T.dot(y)
     beta = TT.dot(Li, PhiTy)
     alpha = TT.dot(Li.T, beta)
     mu_f = TT.dot(Phi, alpha)
     var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None]
     dsp = noise*(var_f+1)
     mu_l = TT.sum(TT.mean(l_F, axis=1))
     sig_l = TT.sum(TT.std(l_F, axis=1))
     mu_w = TT.sum(TT.mean(F, axis=1))
     sig_w = TT.sum(TT.std(F, axis=1))
     hermgauss = np.polynomial.hermite.hermgauss(30)
     herm_x = Ts(hermgauss[0])[None, None, :]
     herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :]
     herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None]
     nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*(
         TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None])
     enll = herm_w*nlk
     nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*(
         (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a
     penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M)
     cost = (nlml+penelty)/X.shape[0]
     grads = TT.grad(cost, params)
     updates = getattr(OPT, algo)(self.params, grads, **algo_params)
     updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9)
     train_inputs = [X, y]
     train_outputs = [cost, alpha, Li]
     self.train_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)])
     self.train_iter_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)], updates=updates)
     Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha')
     l_FFs = TT.dot(Xs, l_F)+l_FC
     FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1)
     Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1)
     Phis = sig_f*TT.sqrt(2./self.M)*Phis
     mu_pred = TT.dot(Phis, alpha)
     std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5
     pred_inputs = [Xs, alpha, Li]
     pred_outputs = [mu_pred, std_pred]
     self.pred_func = Tf(pred_inputs, pred_outputs,
         givens=[(params, self.params)])
Esempio n. 2
0
    def build_ann(self, weights, biases, layer_sizes=[784, 400, 10],
                  activation=[Tann.sigmoid, Tann.sigmoid, Tann.sigmoid]):
        """
        Builds a neural network with topology from the layer_sizes.
        :parameter activation is the activation function for the network
        :parameter rand_limit_min is the minimum limit for random initialization of weights for all layers
        :parameter rand_limit_max is the maximum limit for random initialization of weights for all layers
        """
        params = []
        inputs, answers = T.dmatrices('input', 'answers')
        assert len(layer_sizes) >= 2

        # Builds the layers
        for i in range(len(layer_sizes) - 1):
            layer = HiddenLayer(inputs, layer_sizes[i], layer_sizes[i + 1], weights[i], biases[i],
                                activation=activation[i])
            params.append(layer.W)
            params.append(layer.b)
            self.layers.append(layer)

        # Sets up the activation functions through the network
        layer = self.layers[0]
        previous_out = layer.activation(T.dot(layer.input, layer.W) + layer.b)
        x_h_out = layer.activation(T.dot(layer.input, layer.W) + layer.b)
        for i in range(len(self.layers) - 1):
            layer = self.layers[i + 1]
            x_h_out = layer.activation(T.dot(previous_out, layer.W) + layer.b)
            previous_out = x_h_out
        self.predictor = theano.function([inputs], [x_h_out])  # Activate
Esempio n. 3
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R")
        mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') 
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1])
        negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        theta = mu+T.exp(logSigma)*v
        W=theta
        y=X[:,0]
        X_sim=X[:,1:]
        f = (T.dot(X_sim,W)+u).flatten()
        
        gradvariables = [mu, logSigma, logLambd]
        
        
        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2)

        logp = (negKL + logLike)/self.m

        optimizer = -logp
        
        self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore')
        self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore')
        derivatives = T.grad(logp,gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
Esempio n. 4
0
 def build_ann(self, layer_sizes=[784, 24, 10], activation=Tann.sigmoid, rand_limit_min=-.1, rand_limit_max=.1):
     """
     Builds a neural network with topology from the layer_sizes.
     :parameter activation is the activation function for the network
     :parameter rand_limit_min is the minimum limit for random initialization of weights for all layers
     :parameter rand_limit_max is the maximum limit for random initialization of weights for all layers
     """
     params = []
     inputs, answers = T.dmatrices('input', 'answers')
     assert len(layer_sizes) >= 2
     for i in range(len(layer_sizes) - 1):
         layer = HiddenLayer(inputs, layer_sizes[i], layer_sizes[i + 1], activation=activation, rand_limit_min=rand_limit_min, rand_limit_max=rand_limit_max)
         # outputs.append(layer.output)
         params.append(layer.W)
         params.append(layer.b)
         self.layers.append(layer)
     previous_out = self.layers[0].output
     x_h_out = self.layers[0].output
     for i in range(len(self.layers)-1):
         layer = self.layers[i+1]
         x_h_out = Tann.sigmoid(T.dot(previous_out, layer.W) + layer.b)
         previous_out = x_h_out
     error = T.sum((answers - x_h_out) ** 2)
     gradients = T.grad(error, params)
     backprop_acts = [(p, p - self.lrate * g) for p, g in zip(params, gradients)]
     self.predictor = theano.function([inputs], [x_h_out])
     self.trainer = theano.function([inputs, answers], error, updates=backprop_acts)
Esempio n. 5
0
def compile_theano_functions():
    """
    Returns compiled theano functions.  
    
    Notes
    -----
    Originally used to speedup multiplication of large matrices and vectors.  Caused strange 
    issue in nipype where nipype unecessarily reran nodes that use these compiled functions.
    Not used in current implementation.
    """
    import theano.tensor as T
    import theano
    
    def TnormCols(X):
        """
        Theano expression which centers and normalizes columns of X `||x_i|| = 1`
        """
        Xc = X - X.mean(0)
        return Xc/T.sqrt( (Xc**2.).sum(0) )
    
    def TzscorrCols(Xn):
        """
        Theano expression which returns Fisher transformed correlation values between columns of a
        normalized input, `X_n`.  Diagonal is set to zero.
        """
        C_X = T.dot(Xn.T, Xn)-T.eye(Xn.shape[1])
        return 0.5*T.log((1+C_X)/(1-C_X))
    
    X,Y = T.dmatrices('X','Y')
    tdot = theano.function([X,Y], T.dot(X,Y))
    tnormcols = theano.function([X], TnormCols(X))

    return tdot, tnormcols
Esempio n. 6
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        X, u = T.dmatrices("X", "u")
        f, y, v = T.dcols("f", "y", "v")

        mu = self.params[0]
        logSigma = self.params[1]
        logLambda = sharedX(np.log(self.sigma_e), name='logLambda')
        #logLambda = self.params[2]

        negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 -
                                                  T.exp(logSigma)**2)
        f = self.regression_simulator(X, u, v, mu, logSigma)

        logLike = -self.m * (0.5 * np.log(2 * np.pi) +
                             logLambda) - 0.5 * T.sum(
                                 (y - f)**2) / (T.exp(logLambda)**2) / self.Lu

        elbo = (negKL + logLike)
        obj = -elbo
        self.lowerboundfunction = th.function([X, y, u, v],
                                              obj,
                                              on_unused_input='ignore')
        derivatives = T.grad(obj, self.params)
        self.gradientfunction = th.function([X, y, u, v],
                                            derivatives,
                                            on_unused_input='ignore')
Esempio n. 7
0
def compute_more_than_one():
    a,b = T.dmatrices('a','b')
    diff = a - b
    abs_diff = abs(diff)
    diff_sq = diff**2
    f = theano.function([a,b],[diff, abs_diff, diff_sq])
    print f([[0,0],[1,2]], [[2,3],[4,1]])
Esempio n. 8
0
 def __init__(self, n_x, n_h, n_y, lr=0, nonlinear='softplus', valid_x=None, valid_y=None):
     print 'PL', n_x, n_h, n_y, lr, nonlinear
     if lr == 0: lr = 10. / n_h
     self.lr = lr
     self.fitted = False
     self.n_x = n_x
     self.n_h = n_h
     self.n_y = n_y
     self.nonlinear = nonlinear
     self.valid_x = valid_x
     self.valid_y = valid_y
     
     if self.nonlinear == 'softplus':
         def g(_x): return T.log(T.exp(_x) + 1)
     else:
         raise Exception()
     
     # Define Theano computational graph
     x, y, w1, b1, w2, b2, A = T.dmatrices('x', 'y', 'w1', 'b1', 'w2', 'b2', 'A')
     h1 = g(T.dot(w1, x) + T.dot(b1, A))
     h2 = g(T.dot(w2, h1) + T.dot(b2, A))
     p = T.nnet.softmax(h2.T).T
     logpy = (- T.nnet.categorical_crossentropy(p.T, y.T).T).reshape((1,-1))
     dlogpy_dw = T.grad(logpy.sum(), [w1, b1, w2, b2])
     H = T.nnet.categorical_crossentropy(p.T, p.T).T #entropy
     dH_dw = T.grad(H.sum(), [w1, b1, w2, b2])
     
     # Define functions to call
     self.f_p = theano.function([x, w1, b1, w2, b2, A], p)
     self.f_dlogpy_dw = theano.function([x, y, w1, b1, w2, b2, A], [logpy] + dlogpy_dw)
     self.f_dH_dw = theano.function([x, w1, b1, w2, b2, A], [H] + dH_dw)
Esempio n. 9
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        X,u = T.dmatrices("X","u")
        f, y, v = T.dcols("f", "y", "v")
        
        mu = self.params[0]
        logSigma = self.params[1]
        logLambda = sharedX(np.log(self.sigma_e),name='logLambda')
        #logLambda = self.params[2]

        negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        f = self.regression_simulator(X,u,v,mu,logSigma)

        logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f)**2)/(T.exp(logLambda)**2)/self.Lu

        elbo = (negKL + logLike)
        obj = -elbo
        self.lowerboundfunction = th.function([X, y, u, v], obj, on_unused_input='ignore')
        derivatives = T.grad(obj,self.params)
        self.gradientfunction = th.function([X,y,u,v], derivatives, on_unused_input='ignore')
Esempio n. 10
0
def compile_theano_functions():
    """
    Returns compiled theano functions.  
    
    Notes
    -----
    Originally used to speedup multiplication of large matrices and vectors.  Caused strange 
    issue in nipype where nipype unecessarily reran nodes that use these compiled functions.
    Not used in current implementation.
    """
    import theano.tensor as T
    import theano

    def TnormCols(X):
        """
        Theano expression which centers and normalizes columns of X `||x_i|| = 1`
        """
        Xc = X - X.mean(0)
        return Xc / T.sqrt((Xc**2.).sum(0))

    def TzscorrCols(Xn):
        """
        Theano expression which returns Fisher transformed correlation values between columns of a
        normalized input, `X_n`.  Diagonal is set to zero.
        """
        C_X = T.dot(Xn.T, Xn) - T.eye(Xn.shape[1])
        return 0.5 * T.log((1 + C_X) / (1 - C_X))

    X, Y = T.dmatrices('X', 'Y')
    tdot = theano.function([X, Y], T.dot(X, Y))
    tnormcols = theano.function([X], TnormCols(X))

    return tdot, tnormcols
Esempio n. 11
0
def multipleThingAtTheSameTime(a, b):
    x, y = T.dmatrices('x', 'y')
    diff = x - y
    abs_diff = abs(diff)
    diff_squared = diff**2
    summ = x + y
    f = th.function([x,y], [diff, abs_diff, diff_squared, summ])
    print(f(a, b))
    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")
        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")
        z1 = T.col("z1")
        if self.continuous:
            #convolve x
            # no_filters = 100, stride = 4, filter_size = 50

            h_encoder = T.tanh(T.dot(W1,x) + b1)
            #h_encoder = T.dot(W1,x) + b1
        else:   
            h_encoder = T.tanh(T.dot(W1,x) + b1)

        mu_encoder = T.dot(W2,h_encoder) + b2
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        mu_encoder = T.dot(W2,h_encoder) + b2 
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder)*eps

        prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))


        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            h_dec = T.nnet.softplus(T.dot(W4,z1) + b4)

            #h_decoder = T.dot(W4,z) + b4
            #h_dec = T.dot(W4,z1) + b4

            mu_decoder = T.tanh(T.dot(W5,h_decoder) + b5)
            mu_dec = T.tanh(T.dot(W5,h_dec) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
        else:
            h_decoder = T.tanh(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)
        
        self.get_z = th.function(gradvariables+[x,eps],z,on_unused_input='ignore')
        self.generate = th.function(gradvariables+[z1,x,eps],mu_dec,on_unused_input='ignore')
        self.predict = th.function(gradvariables+[x,eps],mu_decoder,on_unused_input='ignore')
        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
    def __init__(self, Q, D, layers, order, D_cum_sum, N, M, non_rec):
        try:
            print('Trying to load model...')
            with open('model_SV1.save', 'rb') as file_handle:
                self.f, self.g = pickle.load(file_handle)
                print('Loaded!')
            return
        except:
            print('Failed. Creating a new model...')

        print('Setting up variables...')
        hyp, SIGMA_S, U, b, MU_S = T.dmatrices('hyp', 'SIGMA_S', 'U', 'b','MU_S')
        y, MEAN_MAP, sn, sf = T.dvectors('y','MEAN_MAP','sn','sf')
        w = T.dscalars('w')
        if Q > 1:
            X = T.dmatrix('X')
        else:
            X = T.dvector('X')     
        if layers > 1:
            MU, SIGMA = T.dmatrices('MU', 'SIGMA')
        else:
            MU, SIGMA = T.dvectors('MU', 'SIGMA')        
        
        SIGMA_trf, SIGMA_S_trf = T.log(1+T.exp(SIGMA))**2, T.log(1+T.exp(SIGMA_S))**2       
        sf_trf, sn_trf, lengthscale_trf, lengthscale_p_trf  =  T.log(1 + T.exp(sf))**2, T.log(1 + T.exp(sn))**2, T.log(1 + T.exp(hyp[:,0])), T.log(1 + T.exp(hyp[:,1]))
        
        print('Setting up model...')
        LL, KL = self.get_model(w, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, MU_S, SIGMA_S_trf, MU, SIGMA_trf, U, b, X, y, MEAN_MAP, Q, D, D_cum_sum, layers, order, non_rec, N, M)

        print('Compiling model...')
        
        inputs = {'X': X, 'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U':  U, 'b':  b, 'hyp': hyp, 'y': y, 'MEAN_MAP': MEAN_MAP, 'sn': sn, 'sf': sf, 'w': w}
        z = 0.0 * sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = {'LL': LL, 'KL': KL}
        self.f = {fn: theano.function(list(inputs.values()), fv+z, name=fn, on_unused_input='ignore') for fn,fv in f.items()}  
                  
        g = {'LL': LL, 'KL': KL}
        wrt = {'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U':  U, 'b':  b, 'hyp': hyp, 'MEAN_MAP': MEAN_MAP,  'sn': sn, 'sf': sf, 'w': w}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in g.items()} for vn, vv in wrt.items()}


        with open('model_SV1.save', 'wb') as file_handle:
            print('Saving model...')
            sys.setrecursionlimit(100000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 14
0
def multiple_input_output():
    # multiple input, multiple output
    a, b = T.dmatrices('a', 'b') # plural
    diff = a - b
    abs_diff =abs(diff)
    diff_squared = diff**2
    f = function([a, b], [diff, abs_diff, diff_squared])
    print '\nmultiple input, multiple outpur'
    print f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
Esempio n. 15
0
 def test_examples_3(self):
     a, b = T.dmatrices('a', 'b')
     diff = a - b
     abs_diff = abs(diff)
     diff_squared = diff**2
     f = function([a, b], [diff, abs_diff, diff_squared])
     elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
     assert numpy.all(elems[0] == array([[1., 0.], [-1., -2.]]))
     assert numpy.all(elems[1] == array([[1., 0.], [1., 2.]]))
     assert numpy.all(elems[2] == array([[1., 0.], [1., 4.]]))
Esempio n. 16
0
 def test_examples_3(self):
     a, b = T.dmatrices('a', 'b')
     diff         = a - b
     abs_diff     = abs(diff)
     diff_squared = diff**2
     f = function([a, b], [diff, abs_diff, diff_squared])
     elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
     assert numpy.all( elems[0] == array([[ 1.,  0.],[-1., -2.]]))
     assert numpy.all( elems[1] == array([[ 1.,  0.],[ 1.,  2.]]))
     assert numpy.all( elems[2] == array([[ 1.,  0.],[ 1.,  4.]]))
Esempio n. 17
0
    def variables(self):
        
        # Define parameters 'w'
        v = {}
        v['w0x'], v['w0y'] = T.dmatrices('w0x','w0y')
        v['b0'] = T.dmatrix('b0')
        for i in range(1, len(self.n_hidden_q)):
            v['w'+str(i)] = T.dmatrix('w'+str(i))
            v['b'+str(i)] = T.dmatrix('b'+str(i))
        v['mean_w'] = T.dmatrix('mean_w')
        v['mean_b'] = T.dmatrix('mean_b')
        if self.type_qz in ['gaussian','gaussianmarg']:
            v['logvar_w'] = T.dmatrix('logvar_w')
        v['logvar_b'] = T.dmatrix('logvar_b')
        
        w = {}
        w['w0y'], w['w0z'] = T.dmatrices('w0y','w0z')
        w['b0'] = T.dmatrix('b0')
        for i in range(1, len(self.n_hidden_p)):
            w['w'+str(i)] = T.dmatrix('w'+str(i))
            w['b'+str(i)] = T.dmatrix('b'+str(i))
        w['out_w'] = T.dmatrix('out_w')
        w['out_b'] = T.dmatrix('out_b')
        
        if self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian':
            w['out_logvar_w'] = T.dmatrix('out_logvar_w')
            w['out_logvar_b'] = T.dmatrix('out_logvar_b')
        
        w['logpy'] = T.dmatrix('logpy')
        
        if self.type_pz == 'studentt':
            w['logv'] = T.dmatrix('logv')

        # Define latent variables 'z'
        z = {'eps': T.dmatrix('eps')}
        
        # Define observed variables 'x'
        x = {}
        x['x'] = T.dmatrix('x')
        x['y'] = T.dmatrix('y')
        
        return v, w, x, z
Esempio n. 18
0
    def variables(self):

        # Define parameters 'w'
        v = {}
        v['w0x'], v['w0y'] = T.dmatrices('w0x', 'w0y')
        v['b0'] = T.dmatrix('b0')
        for i in range(1, len(self.n_hidden_q)):
            v['w' + str(i)] = T.dmatrix('w' + str(i))
            v['b' + str(i)] = T.dmatrix('b' + str(i))
        v['mean_w'] = T.dmatrix('mean_w')
        v['mean_b'] = T.dmatrix('mean_b')
        if self.type_qz in ['gaussian', 'gaussianmarg']:
            v['logvar_w'] = T.dmatrix('logvar_w')
        v['logvar_b'] = T.dmatrix('logvar_b')

        w = {}
        w['w0y'], w['w0z'] = T.dmatrices('w0y', 'w0z')
        w['b0'] = T.dmatrix('b0')
        for i in range(1, len(self.n_hidden_p)):
            w['w' + str(i)] = T.dmatrix('w' + str(i))
            w['b' + str(i)] = T.dmatrix('b' + str(i))
        w['out_w'] = T.dmatrix('out_w')
        w['out_b'] = T.dmatrix('out_b')

        if self.type_px == 'sigmoidgaussian' or self.type_px == 'gaussian':
            w['out_logvar_w'] = T.dmatrix('out_logvar_w')
            w['out_logvar_b'] = T.dmatrix('out_logvar_b')

        w['logpy'] = T.dmatrix('logpy')

        if self.type_pz == 'studentt':
            w['logv'] = T.dmatrix('logv')

        # Define latent variables 'z'
        z = {'eps': T.dmatrix('eps')}

        # Define observed variables 'x'
        x = {}
        x['x'] = T.dmatrix('x')
        x['y'] = T.dmatrix('y')

        return v, w, x, z
    def createGradientFunctions(self):
        #Create the Theano variables
        W1, W2, W3, W4, W5, W6, x, eps = T.dmatrices("W1", "W2", "W3", "W4",
                                                     "W5", "W6", "x", "eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1, b2, b3, b4, b5, b6 = T.dcols("b1", "b2", "b3", "b4", "b5", "b6")

        if self.continuous:
            h_encoder = T.nnet.softplus(T.dot(W1, x) + b1)
        else:
            h_encoder = T.tanh(T.dot(W1, x) + b1)

        mu_encoder = T.dot(W2, h_encoder) + b2
        log_sigma_encoder = 0.5 * (T.dot(W3, h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder) * eps

        prior = 0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 -
                            T.exp(2 * log_sigma_encoder))

        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4, z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            log_sigma_decoder = 0.5 * (T.dot(W6, h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) -
                           0.5 *
                           ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1, W2, W3, W4, W5, W6, b1, b2, b3, b4, b5, b6]
        else:
            h_decoder = T.tanh(T.dot(W4, z) + b4)
            y = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y, x).sum()
            gradvariables = [W1, W2, W3, W4, W5, b1, b2, b3, b4, b5]

        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp, gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [x, eps],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x, eps],
                                              logp,
                                              on_unused_input='ignore')
        self.zfunction = th.function(gradvariables + [x, eps],
                                     z,
                                     on_unused_input='ignore')
def test_1_examples_compute_more_than_1_return_value():
    a, b = T.dmatrices('a', 'b')
    diff = a - b
    abs_diff = abs(diff)
    diff_squared = diff**2
    f = theano.function([a, b], [diff, abs_diff, diff_squared])

    diff_res, abs_res, diff_squared_res = f([[1, 1], [1, 1]], [[0, 0], [2, 2]])
    np.testing.assert_array_almost_equal(diff_res, [[1, 1], [-1, -1]])
    np.testing.assert_array_almost_equal(abs_res, [[1, 1], [1, 1]])
    np.testing.assert_array_almost_equal(diff_squared_res, [[1, 1], [1, 1]])
 def setUp(self):
     test_in_1 = InputLayer((None, None))
     test_in_2 = InputLayer((None, None))
     self.l = CosineSimilarityLayer(test_in_1, test_in_2)
     in1, in2 = T.dmatrices('in1', 'in2')
     pred_out = layers.get_output(self.l,
                                  inputs={
                                      test_in_1: in1,
                                      test_in_2: in2
                                  })
     self.fn = theano.function([in1, in2], pred_out)
Esempio n. 22
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f",
                                           "R")
        mu = sharedX(np.random.normal(10, 10, (self.dimTheta, 1)), name='mu')
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)),
                           name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),
                           name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"), [1, 1])
        negKL = 0.5 * T.sum(1 + 2 * logSigma - mu**2 - T.exp(logSigma)**2)
        theta = mu + T.exp(logSigma) * v
        W = theta
        y = X[:, 0]
        X_sim = X[:, 1:]
        f = (T.dot(X_sim, W) + u).flatten()

        gradvariables = [mu, logSigma, logLambd]

        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 *
                        ((y - f) / (T.exp(logLambd)))**2)

        logp = (negKL + logLike) / self.m

        optimizer = -logp

        self.negKL = th.function([mu, logSigma],
                                 negKL,
                                 on_unused_input='ignore')
        self.f = th.function(gradvariables + [X, u, v],
                             f,
                             on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v],
                                   logLike,
                                   on_unused_input='ignore')
        derivatives = T.grad(logp, gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v],
                                              logp,
                                              on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer,
                                              params=gradvariables,
                                              inputs=[X, u, v],
                                              conjugate=True,
                                              max_iter=1)
    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6,pi = T.dcols("b1","b2","b3","b4","b5","b6","pi")
        
        if self.continuous:
            h_encoder = T.nnet.softplus(T.dot(W1,x) + b1)
        else:   
            h_encoder = T.tanh(T.dot(W1,x) + b1)
        print type(pi)    
        rng = T.shared_randomstreams.RandomStreams(seed=124)
        i = rng.choice(size=(1,), a=self.num_model, p=T.nnet.softmax(pi.T).T.flatten())

        mu_encoder = T.dot(W2[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder) + b2[i[0]*self.dimZ:(1+i[0])*self.dimZ]
        log_sigma_encoder = (0.5*(T.dot(W3[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder)))+ b3[i[0]*self.dimZ:(1+i[0])*self.dimZ]

        z = mu_encoder + T.exp(log_sigma_encoder)*eps
     
        
        prior = 0
        for i in range(self.num_model):
            prior += T.exp(pi[i][0])*0.5* T.sum(1 + 2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ] - mu_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ]**2 - T.exp(2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ]))
        prior /= T.sum(T.exp(pi))
        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6,pi]
        else:
            h_decoder = T.tanh(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5,pi]


        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logpxz)
        
        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
        self.hiddenstatefunction = th.function(gradvariables + [x,eps], z, on_unused_input='ignore')
Esempio n. 24
0
def examine(sample, expected, layers):
    layer1 = layers[0]
    layer2 = layers[1]
    layer3 = layers[2]
    samplesize = sample.shape[0]
    x, y = T.dmatrices('x', 'y')
    firstoutput = 1 / (1 + T.exp(-T.dot(x, layer1.weights) - layer1.bias))
    secondoutput = 1 / (
        1 + T.exp(-T.dot(firstoutput, layer2.weights) - layer2.bias))
    finaloutput = T.dot(secondoutput, layer3.weights) + layer3.bias
    err = ((finaloutput - y)**2).sum()
    f = theano.function([x, y], err)
    outcome = f(sample, expected)
    result = outcome / samplesize
    return result
Esempio n. 25
0
def calc2elements():
    """
    一次计算两个输入元素。
    http://deeplearning.net/software/theano/tutorial/examples.html
    这是计算对数函数曲线的y值。输入一个矩阵,元素是x的取值,输出是与输入矩阵中元素对应的y值。
    """
    import theano.tensor as T
    from theano import pp
    a, b = T.dmatrices('a', 'b')
    diff = a - b
    abs_diff = abs(diff)
    diff_square = diff ** 2
    f = function([a, b], [diff, abs_diff, diff_square])
    diff, abs_diff, diff_square = f([[1, 1], [1, 1]], [[0, 1], [2, 3]])

    print (diff)
    print (abs_diff)
    print (diff_square)
Esempio n. 26
0
    def __init__(self,
            initial_params=None):
        print 'Setting up variables ...'
        # Parameters
        if initial_params is None:
            initial_params = {'mean':None,
                              'sigma_n':0.+np_uniform_scalar(0),
                              'sigma_f':0.+np_uniform_scalar(0),
                              'l_k':0.+np.uniform_scalar(0)}
        if initial_params['mean'] == None:
            self.mean = shared_scalar(0.)
            self.meanfunc = 'zero'
        else:
            self.mean = shared_scalar(initial_params['mean'])
            self.meanfunc = 'const'
        self.sigma_n = shared_scalar(initial_params['sigma_n'])
        self.sigma_f = shared_scalar(initial_params['sigma_f'])
        self.l_k = shared_scalar(initial_params['l_k'])
        
        # Variables
        X,Y,x_test = T.dmatrices('X','Y','x_test')

        print 'Setting up model ...'
        K, Ks, Kss, y_test_mu, y_test_var, log_likelihood,L,alpha,V,fs2,sW = self.get_model(X, Y, x_test)

        print 'Compiling model ...'
        inputs = {'X': X, 'Y': Y, 'x_test': x_test}
        # solve a bug with derivative wrt inputs not in the graph
        z = 0.0*sum([T.sum(v) for v in inputs.values()])
        f = zip(['K', 'Ks', 'Kss', 'y_test_mu', 'y_test_var', 'log_likelihood',
                 'L','alpha','V','fs2','sW'],
                [K, Ks, Kss, y_test_mu, y_test_var, log_likelihood,
                 L, alpha,V,fs2,sW])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore')
                     for n, f in f}

        if self.meanfunc == 'zero':
            wrt = {'sigma_n':self.sigma_n, 'sigma_f':self.sigma_f, 'l_k':self.l_k}
        else:
            wrt = {'mean':self.mean,'sigma_n':self.sigma_n, 'sigma_f':self.sigma_f, 'l_k':self.l_k}
        
        self.g = {vn: theano.function(inputs.values(), T.grad(log_likelihood,vv),
                                      name=vn,on_unused_input='ignore')
                                      for vn, vv in wrt.iteritems()}
Esempio n. 27
0
def JacobiTimesVector():
    W, V = T.dmatrices(['W', 'V'])
    x = T.dvector('x')
    y = T.dot(x, W)
    JV = T.Rop(y, W, V)
    f = function([W, V, x], JV)
    print(f(
        [[1, 1], [1, 1]],
        [[2, 2], [2, 2]],
        [0, 1]
    ))

    v = T.dvector('v')
    VJ = T.Lop(y, W, v)
    fL = function([v, x], VJ)
    print(fL(
        [2, 2],
        [0, 1]
    ))
def main():
    x = T.dmatrix('x')

    # T.exp
    s = 1 / (1 + T.exp(-x))
    logistic = function([x], s)

    # 0 is 0.5, negative < 0.5...
    print(logistic([[0, 1], [-1, -2]]))
    # logistic function can be expressed with hyperbolic tan term
    s2 = (1 + T.tanh(x / 2)) / 2
    logistic2 = function([x], s2)
    print(
        np.allclose(logistic([[0, 1], [-1, -2]]), logistic2([[0, 1], [-1,
                                                                      -2]])))

    # do more things at a time
    a, b = T.dmatrices('a', 'b')
    diff = a - b
    abs_diff = abs(diff)
    diff_squared = diff**2
    f = function([a, b], [diff, abs_diff, diff_squared])
    print(f([[1, 1], [1, 1]], [[0, 1], [2, 3]]))

    # default value
    x, y = T.dscalars('x', 'y')
    z = x + y
    f = function([x, In(y, value=1)], z)
    print(f(33))
    print(f(33, 2))

    # Inputs with default values must follow inputs without default
    # values (like Python’s functions). There can be multiple inputs
    # with default values. These parameters can be set positionally
    # or by name, as in standard Python
    x, y, w = T.dscalars('x', 'y', 'w')
    z = (x + y) * w
    f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
    print(f(33))
    print(f(33, 2))
    print(f(33, 0, 1))
    print(f(33, w_by_name=1))
    print(f(33, w_by_name=1, y=0))
Esempio n. 29
0
    def __init__(self,
                 n_x,
                 n_h,
                 n_y,
                 lr=0,
                 nonlinear='softplus',
                 valid_x=None,
                 valid_y=None):
        print 'PL', n_x, n_h, n_y, lr, nonlinear
        if lr == 0: lr = 10. / n_h
        self.lr = lr
        self.fitted = False
        self.n_x = n_x
        self.n_h = n_h
        self.n_y = n_y
        self.nonlinear = nonlinear
        self.valid_x = valid_x
        self.valid_y = valid_y

        if self.nonlinear == 'softplus':

            def g(_x):
                return T.log(T.exp(_x) + 1)
        else:
            raise Exception()

        # Define Theano computational graph
        x, y, w1, b1, w2, b2, A = T.dmatrices('x', 'y', 'w1', 'b1', 'w2', 'b2',
                                              'A')
        h1 = g(T.dot(w1, x) + T.dot(b1, A))
        h2 = g(T.dot(w2, h1) + T.dot(b2, A))
        p = T.nnet.softmax(h2.T).T
        logpy = (-T.nnet.categorical_crossentropy(p.T, y.T).T).reshape((1, -1))
        dlogpy_dw = T.grad(logpy.sum(), [w1, b1, w2, b2])
        H = T.nnet.categorical_crossentropy(p.T, p.T).T  #entropy
        dH_dw = T.grad(H.sum(), [w1, b1, w2, b2])

        # Define functions to call
        self.f_p = theano.function([x, w1, b1, w2, b2, A], p)
        self.f_dlogpy_dw = theano.function([x, y, w1, b1, w2, b2, A],
                                           [logpy] + dlogpy_dw)
        self.f_dH_dw = theano.function([x, w1, b1, w2, b2, A], [H] + dH_dw)
Esempio n. 30
0
def _getModel():
    s1, s2 = T.dvectors('s1', 's2')
    t1, t2 = T.dmatrices('t1', 't2')
    gw = T.dvector('gw')
    prank = T.dvector('prank')

    r1 = T.dot(t1, prank)
    r2 = T.dot(t2, prank)

    erd = T.exp(r2 - r1)
    p = erd / (erd + 1)

    loglterms = gw * ((s1 * T.log(1 - p)) + (s2 * T.log(p)))

    logl = -T.sum(loglterms)

    gradf = T.grad(logl, prank)
    hessf = theano.gradient.hessian(logl, prank)

    return s1, s2, t1, t2, gw, prank, loglterms, logl, gradf, hessf
Esempio n. 31
0
def _getModel():
    s1, s2 = T.dvectors('s1', 's2')
    t1, t2 = T.dmatrices('t1', 't2')
    gw = T.dvector('gw')
    prank = T.dvector('prank')

    r1 = T.dot(t1, prank)
    r2 = T.dot(t2, prank)

    erd = T.exp(r2 - r1)
    p = erd / (erd + 1)

    loglterms = gw * ((s1 * T.log(1 - p)) + (s2 * T.log(p)))

    logl = -T.sum(loglterms)

    gradf = T.grad(logl, prank)
    hessf = theano.gradient.hessian(logl, prank)

    return s1, s2, t1, t2, gw, prank, loglterms, logl, gradf, hessf
Esempio n. 32
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        y = T.dmatrices("y")
        i = T.iscalar("i")
        v = T.dscalar("i")
        xStart = T.dvector("xStart")

        mu = self.params[0]
        #logSigma = sharedX(np.random.uniform(0, 1, (self.dimTheta, 1)), name='logSigma')
        logSigma = self.params[1]
        #logLambda = sharedX(np.random.uniform(0, 10), name='logLambda')
        logLambda = self.params[2]

        negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        self.k = mu+T.exp(logSigma)*v
        V1 = T.dvector("V2")
        V2 = T.dvector("V2")
        results, updates = th.scan(fn=self.fisher_wright_normal_approx, outputs_info=[{'initial':xStart,'taps':[-1]}],sequences=[V1,V2], n_steps=i)
        f = results

        logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f)**2)/(T.exp(logLambda)**2)
        part2 = f
        #0.5*T.sum((y-f)**2)
        #/(T.exp(logLambda)**2)
        elbo = (negKL + logLike)
        obj = -elbo
        test1 = y[0:self.i/4,:].sum(axis=0)/(self.i/4)
        test2 = y[self.i/4:self.i/2].sum(axis=0)/(self.i/4)
        self.test = th.function([xStart, i, y, v, V1, V2],test,on_unused_input='ignore')
        self.part2 = th.function([xStart, i, y, v, V1, V2], part2, updates=updates, on_unused_input='ignore')
        self.logLike = th.function([xStart, i, y, v, V1, V2], logLike, updates=updates, on_unused_input='ignore')
        self.lowerboundfunction = th.function([xStart, i, y, v, V1, V2], obj, updates=updates, on_unused_input='ignore')
        derivatives = T.grad(obj, self.params)
        self.gradientfunction = th.function([xStart, i, y, v, V1, V2], derivatives, updates=updates, on_unused_input='ignore')
Esempio n. 33
0
def train(sample, expected, iteration, layers=None):
    '''
	sample and expected are matrices where each rows are a single datum
	and the size of the rows are the size of the datum
	'''
    vectorinlength = sample.shape[1]
    vectoroutlength = expected.shape[1]
    datasize = sample.shape[0]
    if layers is None:
        layer1 = layer(vectorinlength, 30)
        layer2 = layer(30, 10)
        layer3 = layer(10, vectoroutlength)
    else:
        layer1 = layers[0]
        layer2 = layers[1]
        layer3 = layers[2]
    x, y = T.dmatrices('x', 'y')
    firstoutput = T.tanh(T.dot(x, layer1.weights) + layer1.bias)
    secondoutput = T.tanh(T.dot(firstoutput, layer2.weights) + layer2.bias)
    finaloutput = T.tanh(T.dot(secondoutput, layer3.weights) + layer3.bias)
    err = ((finaloutput - y)**2).mean()
    f = theano.function(
        [x, y],
        err,
        updates=((layer1.weights,
                  layer1.weights - 0.05 * T.grad(err, layer1.weights)),
                 (layer1.bias, layer1.bias - 0.05 * T.grad(err, layer1.bias)),
                 (layer2.weights,
                  layer2.weights - 0.05 * T.grad(err, layer2.weights)),
                 (layer2.bias, layer2.bias - 0.05 * T.grad(err, layer2.bias)),
                 (layer3.weights, layer3.weights -
                  (0.05 / datasize / datasize) * T.grad(err, layer3.weights)),
                 (layer3.bias, layer3.bias - 0.05 * T.grad(err, layer3.bias))))
    totalerr = 0
    for i in range(iteration):
        totalerr = f(sample, expected)
        print(totalerr)
    return layer1, layer2, layer3
Esempio n. 34
0
def test():
    # multiple inputs, multiple outputs
    a, b = T.dmatrices('a', 'b')
    diff = a - b
    abs_diff = T.abs_(diff)
    sqr_diff = diff ** 2
    f = function([a, b], [diff, abs_diff, sqr_diff])
    h, i, j = f([[0, 1], [2, 3]], [[4, 5], [6, 7]])

    # default value for function arguments
    a, b = T.dscalars('a', 'b')
    z = a + b
    f = function([a, Param(b, default=1)], z)
    print f(1, b=2)
    print f(1)
    print f(1, 2)

    # shared variable
    state = shared(0)
    inc = T.lscalar('inc') # state is int64 by default
    accumulator = function([inc], state, updates=[(state, state + inc)])
    print accumulator(300)
    print state.get_value()
Esempio n. 35
0
def compute_tanh():
    rng = np.random

    # 定义自变量
    x, w = T.dmatrices('x', 'w')
    b = T.dvector('b')

    # 定义循环体,计算因变量
    y, u = theano.scan(lambda i, w, b: T.tanh(T.dot(i, w) + b),
                       sequences=x,
                       non_sequences=[w, b])

    # 构造完整计算方法
    result = function([x, w, b], y)

    # 初始化数据
    x_t = rng.rand(4, 5)
    w_t = rng.rand(5, 4)
    b_t = rng.rand(4)

    # 喂数据
    print x_t
    print w_t
    print result(x_t, w_t, b_t)
Esempio n. 36
0
 def test_duplicate_updates(self):
     x, y = dmatrices('x', 'y')
     z = shared(numpy.ones((2, 3)))
     self.assertRaises(ValueError,
                       theano.function, [x, y], [z],
                       updates=[(z, (z + x + y)), (z, (z - x))])
# the agent can go forward or backward by one state with wrapping(so if you go back from the 1st state you go to the
# end).
states = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
NUM_STATES = len(states)
NUM_ACTIONS = 2
FUTURE_REWARD_DISCOUNT = 0.5
LEARNING_RATE = 0.1


def hot_one_state(index):
    array = np.zeros(NUM_STATES)
    array[index] = 1.
    return array.reshape(array.shape[0], 1)  # Theano is sad if the shape looks like (10,) rather than (10,1)


state, targets = T.dmatrices('state', 'targets')
hidden_weights = theano.shared(value=np.zeros((NUM_ACTIONS, NUM_STATES)), name='hidden_weights')

output_fn = T.dot(hidden_weights, state)
output = theano.function([state], output_fn)

states_input = T.dmatrix('states_input')
loss_fn = T.mean((T.dot(hidden_weights, states_input) - targets) ** 2)

gradient = T.grad(cost=loss_fn, wrt=hidden_weights)

train_model = theano.function(
    inputs=[states_input, targets],
    outputs=loss_fn,
    updates=[[hidden_weights, hidden_weights - LEARNING_RATE * gradient]],
    allow_input_downcast=True
Esempio n. 38
0
from theano import tensor as T
import theano, time, numpy

rows, cols = 2, 2

param, constant = T.dmatrices('param', 'constant')

p1, p2, p3, p4 = T.dmatrices('p1', 'p2', 'p3', 'p4')
x, y, z, w, u = T.dmatrices('x', 'y', 'z', 'w', 'u')

ones = numpy.ones((rows, cols))
zeros = numpy.zeros((rows, cols))


# Second Group of rules - Decision Tree rules , apply to all nodes
def rule(p1, *therest):
    param = eval('p1 + 1/2')
    param = T.switch(eval('T.gt(param,1)'), ones, zeros)
    return param


#param = eval('p1+p2/p3')

list = [p1]
f_switch = theano.function(list,
                           eval('rule(p1)'),
                           mode=theano.Mode(linker='vm'))

m1 = numpy.random.rand(rows, cols)
m2 = numpy.random.rand(rows, cols)
m3 = numpy.random.rand(rows, cols)
Esempio n. 39
0
vec = T.dvector()
scal = T.dscalar()
sv_add = vec + scal

f_add = function([vec, scal], sv_add)

print f_add([1, 2, 3, 4], 2)

x = T.dmatrix()
y = T.dmatrix()
z = x + y

f = function([x, y], z)
print f([[1, 1], [2, 2]], [[3, 3], [4, 4]])

x, y = T.dmatrices('x', 'y')
s = 1 / (1 + T.exp(-x))

logistic = function([x], s)
print logistic([[0, 1], [-1, -2]])

a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2

f = function([a, b], [diff, abs_diff, diff_squared])
print f([[1, 1], [1, 1]], [[0, 1], [2, 3]])

from theano import shared
    def __init__(self, params,correct, samples = 20,batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        
        #データの保存ファイル
        model_file_name = 'model2' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g,self.ES_US= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,m,S_b,Z,eps_NQ,eps_M=\
        T.dmatrices('X','Y','X_test','m','S_b','Z','eps_NQ','eps_M')

        mu,Sigma=T.dmatrices('mu','Sigma')

        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        
        N,Q= m.shape
        M=Z.shape[0]
        D=X.shape[1]
        
        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        S=T.exp(S_b)

        
        Xtilda = m + S * eps_NQ

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        rv_u = srng.normal((2,N,Q))
        rv_s = srng.normal((2,N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘
        
        xx_s=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_s
        xxx_s=xx_s.reshape([2,N,1,Q])
        zz=Z.reshape([1,1,M,Q])
        rbf_u=T.exp(-T.sum(((xxx_s-zz)**2)/(2*l.reshape([1,1,1,Q])),-1))*sf2#N×M
        A=Kmm+beta*T.sum(T.mean(rbf_u.reshape([2,M,1,N])*rbf_u.reshape([2,1,M,N]),0),-1)
        Ainv=sT.matrix_inverse(A)
        Sigma_f=T.dot(Kmm,T.dot(Ainv,Kmm))
                     
        xx=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_u
        xxx=xx.reshape([2,N,1,Q])
        rbf=T.mean(T.exp(-T.sum(((xxx-zz)**2)/(2*l.reshape([1,1,1,Q])),-1)),0)#N×M
        RHS=T.sum(rbf.reshape([M,1,N])*X.reshape([1,D,N]),2)

        mu_f=beta*T.dot(Kmm,T.dot(Ainv,RHS)) 
        
        self.ES_US = theano.function([m,S_b,Z,X,lhyp,ls], [mu_f,Sigma_f],on_unused_input='ignore')
        
        rv_u_d = srng.normal((N,Q))
        rv_s_d = srng.normal((N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘
        Xtilda_u = m + S * rv_u_d
        Xtilda_s = m + S * rv_s_d
        Kmn_u = ker.RBF(sf2, l, Z, Xtilda_u)
        Kmn_s = ker.RBF(sf2, l, Z, Xtilda_s)
        
        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
        

        #スケール変換
        Sigma_L=sT.cholesky(Sigma)
        U = mu+Sigma_L.dot(eps_M)
        
        mean_U=T.dot(Kinterval.T,U)
        Covariance = beta       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct      
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu, Sigma_L, Kmm,KmmInv)
        
        print ('Compiling model ...')        


        inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma': Sigma, 'lhyp': lhyp, 'ls': ls, 
            'eps_M': eps_M, 'eps_NQ': eps_NQ}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'lhyp': lhyp, 'ls': ls}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g,self.ES_US], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 41
0
    def __init__(self, layer_sizes, epochs, batch_size, learn_rate, init_seed=None, verbose=False, cost='MSE',
                 regularizer=None, l=1, report_every=1):
        """
        A multi-layer perceptron implemented using Theano
        :param layer_sizes: sizes of the layers, including input and output
        :param epochs: number of epochs to run
        :param batch_size: number of samples in each minibatch.
        :param learn_rate: hyperparameter controlling learning velocity
        :param init_seed: used to seed numpy RNG
        :param verbose: 0 for nothing, 1 for cost updates
        :param cost: 'MSE'(mean squared error), 'cross entropy'
        :param regularizer: 'weight decay' or None
        :param l: coefficient of regularizer
        """

        assert len(layer_sizes) > 1
        self.init_seed = init_seed
        self.layer_sizes = layer_sizes
        self.epochs = epochs
        self.batch_size = batch_size
        self.learn_rate = learn_rate
        self.verbose = verbose
        self.cost = cost
        self.regularizer = regularizer
        self.l = l
        self.cost_value = None
        self.report_every = report_every

        n_layers = len(layer_sizes)
        trainxvar, trainyvar = T.dmatrices('xt', 'yt')
        x, y = T.dmatrices('x', 'y')

        # initializing the weights and biases randomly
        weights = []
        biases = []
        np.random.seed(init_seed)
        for i in range(n_layers-1):
            weights.append(shared(np.random.randn(layer_sizes[i], layer_sizes[i+1]), name='w{}'.format(i)))
            biases.append(shared(np.random.randn(layer_sizes[i+1]), name='b{}'.format(i)))

        # forward propagation
        a = []
        for i in range(n_layers-1):
            if i == 0:
                a.append(
                    1 / (1 + T.exp(-(T.dot(x, weights[i]) + biases[i])))
                )
            else:
                a.append(
                    1 / (1 + T.exp(-(T.dot(a[i-1], weights[i]) + biases[i])))
                )
        self.a = a
        self.w = weights

        self._feed_forward = function([x], a[-1])
        self._predict_best = function([x], a[-1].argmax(axis=1))
        self._predict_activation = function([x], a[-1].round())

        # creating cost function
        if cost == 'MSE':
            err = (y - a[-1]) ** 2 / 2
        elif cost == 'cross entropy':
            err = -y * T.log(a[-1]) - (1 - y)*T.log(1 - a[-1])
        else:
            raise ValueError("Unknown cost function, {}".format(cost))

        # adding regularization function to it
        if regularizer is None:
            cost_f = err.mean()
        elif regularizer == 'weight decay':
            cost_f = err.mean() + l / (2 * x.size[0]) * T.sum([(_w ** 2).sum() for _w in weights])
        else:
            raise ValueError("Unknown regularization method, {}".format(regularizer))

        self._get_cost = function([x, y], cost_f)

        # creating training function
        dweights = T.grad(cost_f, weights)
        dbiases = T.grad(cost_f, biases)
        idx = T.lscalar()
        self._train = function(inputs=[idx, trainxvar, trainyvar],
                               outputs=[cost_f],
                               updates=[(_w, _w - learn_rate * _gw) for _w, _gw in zip(weights, dweights)] +
                                       [(_b, _b - learn_rate * _gb) for _b, _gb in zip(biases, dbiases)],
                               givens=[
                                   (x, trainxvar[batch_size*idx: batch_size*(idx+1)]),
                                   (y, trainyvar[batch_size*idx: batch_size*(idx+1)])
                               ])
Esempio n. 42
0
from numpy import empty, inf, zeros, array, abs, count_nonzero
from matplotlib.pyplot import ion, draw, plot, savefig
from cv2 import imwrite, waitKey
from LogisticRegression import LogisticRegression as LogReg
from theano import function, pp, config as cfg
from time import sleep
# cfg.openmp = True
import theano.tensor as T
from dataset import loadData, OneToMany
from visual import visualize

Tr, Ts, _ = loadData('mnist.pkl.gz', True)
m_sample = Tr[0].shape[0]
m_test_sample = Ts[1].shape[0]

x, y = T.dmatrices('x', 'y')
L = LogReg(x, 784, 10)
lam = 0.04

p = L.predict()
l = L.cost(y) + L.regularizer(lam)
gw = T.grad(l, wrt=L.W)
gb = T.grad(l, wrt=L.B)
alpha = 0.05

W_shape = L.weightShapes()[0]
B_shape = L.weightShapes()[1]
VW = zeros(W_shape)
VB = zeros(B_shape)

train = function([x, y], [l, gw, gb])
Esempio n. 43
0
import numpy as np
import theano.tensor as T
import theano
import os
from PIL import Image
from PIL import ImageDraw

y1, y2 = T.dmatrices('y1', 'y2')
loss = 0.0

scale_vector = []
scale_vector.extend([2] * 4)
scale_vector.extend([1] * 20)
scale_vector = np.reshape(np.asarray(scale_vector), (1, len(scale_vector)))

for i in range(2):
    y1_piece = y1[:, i * 25:i * 25 + 24]
    y2_piece = y2[:, i * 25:i * 25 + 24]

    y1_piece = y1_piece * scale_vector
    y2_piece = y2_piece * scale_vector

    loss_piece = T.sum(T.square(y1_piece - y2_piece), axis=1)
    loss = loss + loss_piece * y2[:, i * 25 + 24]

    closs = T.square(y2[:, i * 25 + 24] - y1[:, i * 25 + 24])
    cmask = (1 - y2[:, i * 25 + 24]) * 0.5 + y2[:, i * 25 + 24]
    closs = closs * cmask
    loss = loss + closs

loss = T.sum(loss)
Esempio n. 44
0
import theano
import theano.tensor as T
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
print f([[1, 2], [-1, 2]], [[0, 1], [3, 4]])
Esempio n. 45
0
import theano.tensor as T

a,b = T.dmatrices('a','b')
x,y = T.dmatrices('x','y')

is_train=1

#1=training,2=test
z= T.switch(T.neq(is_train, 0), 1, 2)

print z.eval()

Esempio n. 46
0
import theano
import theano.tensor as tensor
from pprint import pprint

matrix = tensor.dmatrix('matrix')

logistic_expression = 1 / (1 + tensor.exp(-matrix))
logistic_function = theano.function([matrix], logistic_expression)

pprint(
    logistic_function([[0, 1],
                       [-1, -2]]))



a, b = tensor.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
pprint(f([[1, 1], [1, 1]], [[0, 1], [2, 3]]))


Esempio n. 47
0
from theano import function
import cv2 as cv
import copy, numpy as np
import math as mt
import logging


def log(percent, flag=1):
    logging.basicConfig(filename='percentage.log', level=logging.DEBUG)
    if flag == 1:
        logging.info(percent)
    if flag == 2:
        logging.debug(percent)


X = T.dmatrices('X')
B = T.dmatrices('B')
W = T.dmatrices('W')
Y = T.dmatrices('Y')
Y_act = T.dvector('Y_act')
Y_cal = T.dvector('Y_cal')
Error = T.scalar(dtype=X.dtype)
P = T.scalar(dtype=X.dtype)

output = (1 / (1 + T.exp(-X)))
sigmoid = function([X], output)

output1 = X  #T.tanh(X)
norm = function([X], output1)

# ouputs class for input sample
import theano
import numpy as np
from theano import function
from theano import tensor as T
from theano.tensor.shared_randomstreams import RandomStreams as RS

X, Y = T.dmatrices(2)
B = T.dvector()
components, updates = theano.scan(lambda x, y, b: T.tanh(T.dot(x, y) + b),
                                  sequences=X,
                                  non_sequences=[Y, B])

ele_comp = function([X, Y, B], components)

dim = 10
X_realization = np.ones((dim, dim), dtype='float64')
Y_realization = np.ones((dim, dim), dtype='float64')
prng = RS(seed=9000)
B_real = prng.normal((dim, ), avg=0, std=2, dtype='float64')
B_realization = function([], B_real)
print ele_comp(X_realization, Y_realization, B_realization())

###################################################################################################################
###################################Evaluating a polynomial#########################################################
###################################################################################################################
co_eff = T.dvector()
free_var = T.dscalar()
max_coeff = T.iscalar()

components, updates = theano.scan(lambda ce, power, fv: ce * (fv**power),
                                  sequences=[co_eff,
Esempio n. 49
0
    def compile_theano():
        """
        This function generates theano compiled kernels for energy and force learning
        ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn

        The position of the atoms relative to the centrla one, and their chemical species
        are defined by a matrix of dimension Mx5

        Returns:
            k3_ee (func): energy-energy kernel
            k3_ef (func): energy-force kernel
            k3_ff (func): force-force kernel
        """
        if not (os.path.exists(Mffpath / 'k3_ee_s.pickle')
                and os.path.exists(Mffpath / 'k3_ef_s.pickle')
                and os.path.exists(Mffpath / 'k3_ff_s.pickle')):
            print("Building Kernels")

            import theano.tensor as T
            from theano import function, scan
            logger.info("Started compilation of theano three body kernels")

            # --------------------------------------------------
            # INITIAL DEFINITIONS
            # --------------------------------------------------

            # positions of central atoms
            r1, r2 = T.dvectors('r1d', 'r2d')
            # positions of neighbours
            rho1, rho2 = T.dmatrices('rho1', 'rho2')
            # hyperparameter
            sig = T.dscalar('sig')
            # cutoff hyperparameters
            theta = T.dscalar('theta')
            rc = T.dscalar('rc')

            # positions of neighbours without chemical species

            rho1s = rho1[:, 0:3]
            rho2s = rho2[:, 0:3]

            # --------------------------------------------------
            # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS
            # --------------------------------------------------

            # first and second configuration
            r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1))
            r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1))
            rjk = T.sqrt(
                T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2))
            rmn = T.sqrt(
                T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2))

            # --------------------------------------------------
            # BUILD THE KERNEL
            # --------------------------------------------------

            # Squared exp of differences
            se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2))
            se_jkmn = T.exp(
                -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 /
                (2 * sig**2))
            se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 /
                            (2 * sig**2))
            se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 /
                            (2 * sig**2))

            # Kernel not summed (cyclic permutations)
            k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] *
                   se_jkmn)
            k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] *
                   se_1j2m[None, :, :, None])
            k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] *
                   se_1jmn[None, :, :, :])

            # final shape is M1 M1 M2 M2
            ker = k1n + k2n + k3n

            cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) * (
                (T.sgn(rc - r1j) + 1) / 2)
            cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) * (
                (T.sgn(rc - r2m) + 1) / 2)

            cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * (
                1 + T.cos(np.pi * rjk / rc)) * ((T.sgn(rc - rjk) + 1) / 2)
            cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * (
                1 + T.cos(np.pi * rmn / rc)) * ((T.sgn(rc - rmn) + 1) / 2)

            # --------------------------------------------------
            # REMOVE DIAGONAL ELEMENTS AND ADD CUTOFF
            # --------------------------------------------------

            # remove diagonal elements AND lower triangular ones from first configuration
            mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk)

            # remove diagonal elements from second configuration
            mask_mn = T.ones_like(rmn) - T.identity_like(rmn)

            # Combine masks
            mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :]

            # Apply mask and then apply cutoff functions
            ker = ker * mask_jkmn
            ker = T.sum(ker * cut_jk[:, :, None, None] *
                        cut_mn[None, None, :, :])

            # --------------------------------------------------
            # FINAL FUNCTIONS
            # --------------------------------------------------

            # global energy energy kernel
            k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                ker,
                                on_unused_input='ignore')

            # global energy force kernel
            k_ef = T.grad(ker, r2)
            k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ef,
                                on_unused_input='ignore')

            # local force force kernel
            k_ff = T.grad(ker, r1)
            k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2),
                                     sequences=T.arange(k_ff.shape[0]),
                                     non_sequences=[k_ff, r2])
            k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ff_der,
                                on_unused_input='ignore')

            # Save the function that we want to use for multiprocessing
            # This is necessary because theano is a crybaby and does not want to access the
            # Automaticallly stored compiled object from different processes
            with open(Mffpath / 'k3_ee_s.pickle', 'wb') as f:
                pickle.dump(k_ee_fun, f)
            with open(Mffpath / 'k3_ef_s.pickle', 'wb') as f:
                pickle.dump(k_ef_fun, f)
            with open(Mffpath / 'k3_ff_s.pickle', 'wb') as f:
                pickle.dump(k_ff_fun, f)

        else:
            print("Loading Kernels")
            with open(Mffpath / "k3_ee_s.pickle", 'rb') as f:
                k_ee_fun = pickle.load(f)
            with open(Mffpath / "k3_ef_s.pickle", 'rb') as f:
                k_ef_fun = pickle.load(f)
            with open(Mffpath / "k3_ff_s.pickle", 'rb') as f:
                k_ff_fun = pickle.load(f)

        # WRAPPERS (we don't want to plug the position of the central element every time)
        def k3_ee(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for global energy-energy correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (float): scalar valued energy-energy 3-body kernel

            """
            return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        def k3_ef(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for global energy-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (array): 3x1 energy-force 3-body kernel

            """
            return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig,
                             theta, rc)

        def k3_ff(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for local force-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (matrix): 3x3 force-force 3-body kernel

            """
            return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        logger.info("Ended compilation of theano three body kernels")

        return k3_ee, k3_ef, k3_ff
Esempio n. 50
0
 def test_duplicate_updates(self):
     x, y = dmatrices("x", "y")
     z = shared(np.ones((2, 3)))
     with pytest.raises(ValueError):
         theano.function([x, y], [z],
                         updates=[(z, (z + x + y)), (z, (z - x))])
    def __init__(self, params, correct, samples=20, batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params = params
        self.batch_size = batch_size

        #データの保存ファイル
        model_file_name = 'model2' + '.save'
        #もしこれまでに作ったのがあるならロードする
        try:
            print('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g = obj
                print('Loaded!')
            return
        except:
            print('Failed. Creating a new model...')

        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')

        lhyp = T.dvector('lhyp')
        ls = T.dvector('ls')

        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q])

        S = T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)

        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) +
                       T.diag(T.exp(T.diag(Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        U = mu_scaled + Sigma_scaled.dot(eps_M)

        print('Setting up cache...')

        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている

        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print('Modeling...')

        Kmn = ker.RBF(sf2, l, Z, Xtilda)
        Knn = ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))) * correct
        KL_X = -self.KLD_X(m, S) * correct
        KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)

        print('Compiling model ...')

        inputs = {
            'X': X,
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls,
            'eps_M': eps_M,
            'eps_NQ': eps_NQ
        }

        z = 0.0 * sum([
            T.sum(v) for v in inputs.values()
        ])  # solve a bug with derivative wrt inputs not in the graph

        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}

        wrt = {
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls
        }
        self.g = {
            vn: {
                gn: theano.function(list(inputs.values()),
                                    T.grad(gv + z, vv),
                                    name='d' + gn + '_d' + vn,
                                    on_unused_input='ignore')
                for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])
            }
            for vn, vv in wrt.items()
        }

        with open(model_file_name, 'wb') as file_handle:
            print('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g],
                        file_handle,
                        protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 52
0
f = theano.function([a,b],out)
print(f([0,1,2],[1,2,3]))

## Logistic
x = T.dmatrix("x")
s = 1 / (1 + T.exp(-x))
logistic = theano.function([x], s)
logistic([[0,1], [-1,-2]]) # elementwise because constituent operations are elementwise

## Equivalently, can use tanh function
s2 = (1 + T.tanh(x / 2)) / 2
logistic2 = function([x], s2)
logistic2([[0,1], [-1,-2]]) # elementwise because constituent operations are elementwise

## Function can also have multiple outputs
a, b = T.dmatrices('a', 'b')
c, d = T.dmatrices('c', 'd')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff ** 2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
f([[1,1], [1,1]], [[0,1],[2,3]])

## Default value for argument
x, y = T.dscalars('x', 'y')
z = x + y
f = theano.function([x, theano.Param(y, default = 1)], z)
f(33)
f(33,2)

from theano import function
Esempio n. 53
0
File: 5.py Progetto: Toodou94/Gittut
Created on Tue Jul  4 13:55:41 2017

@author: zhangli
"""

import numpy as np
import theano.tensor as T
import theano

#activation function example激励函数
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))  #logistic or soft step,激励函数的一种
logistic = theano.function([x], s)
print(logistic([[0, 1], [2, 3]]))

# multiply outputs for a function
a, b = T.dmatrices('a', 'b')  #定义两个同类型的function是dmatrices
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
print(f(np.ones((2, 2)), np.arange(4).reshape((2, 2))))

#name for a function
x, y, w = T.dscalars('x', 'y', 'w')
z = (x + y) * w
f = theano.function(
    [x, theano.In(y, value=1),
     theano.In(w, value=2, name='weights')], z)
print(f(23, 2, weights=4))
S = 1 / (1 + T.exp(-X))

# Define and test function matrix_add
logistics = function([X], S)
print "===== Function logistics tests ====="
input = [[1, 2], [3, 4]]
print "input: "
print input
print "Function logistics output: "
print logistics(input)


# Computing More than one Thing at the Same Time
print ""
print "===== Define matrices a,b and function diffs with three outputs ====="
a, b = T.dmatrices('a', 'b')
diff = a - b
absdiff = abs(diff)
squareddiff = diff ** 2
# Define and test function diffs
diffs = function([a, b], [diff, absdiff, squareddiff])
print "===== Function diffs tests ====="
input1 = [[1, 1], [1, 1]]
print "input1: "
print input1
input2 = [[0, 1], [2, 3]]
print "input2: "
print input2
print "Function diffs output: "
print diffs(input1, input2)
Esempio n. 55
0
    def compile_theano():
        """
        This function generates theano compiled kernels for energy and force learning
        ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn

        The position of the atoms relative to the centrla one, and their chemical species
        are defined by a matrix of dimension Mx5

        Returns:
            km_ee (func): energy-energy kernel
            km_ef (func): energy-force kernel
            km_ff (func): force-force kernel
        """

        if not (os.path.exists(Mffpath / 'k3_ee_m.pickle')
                and os.path.exists(Mffpath / 'k3_ef_m.pickle')
                and os.path.exists(Mffpath / 'k3_ff_m.pickle')):
            print("Building Kernels")

            import theano.tensor as T
            from theano import function, scan

            logger.info("Started compilation of theano three body kernels")

            # --------------------------------------------------
            # INITIAL DEFINITIONS
            # --------------------------------------------------

            # positions of central atoms
            r1, r2 = T.dvectors('r1d', 'r2d')
            # positions of neighbours
            rho1, rho2 = T.dmatrices('rho1', 'rho2')
            # hyperparameter
            sig = T.dscalar('sig')
            # cutoff hyperparameters
            theta = T.dscalar('theta')
            rc = T.dscalar('rc')

            # positions of neighbours without chemical species

            rho1s = rho1[:, 0:3]
            rho2s = rho2[:, 0:3]

            alpha_1 = rho1[:, 3].flatten()
            alpha_2 = rho2[:, 3].flatten()

            alpha_j = rho1[:, 4].flatten()
            alpha_m = rho2[:, 4].flatten()

            alpha_k = rho1[:, 4].flatten()
            alpha_n = rho2[:, 4].flatten()

            # --------------------------------------------------
            # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS
            # --------------------------------------------------

            # first and second configuration
            r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1))
            r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1))
            rjk = T.sqrt(
                T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2))
            rmn = T.sqrt(
                T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2))

            # --------------------------------------------------
            # CHEMICAL SPECIES MASK
            # --------------------------------------------------

            # numerical kronecker
            def delta_alpha2(a1j, a2m):
                d = np.exp(-(a1j - a2m)**2 / (2 * 0.00001**2))
                return d

            # permutation 1

            delta_alphas12 = delta_alpha2(alpha_1[0], alpha_2[0])
            delta_alphasjm = delta_alpha2(alpha_j[:, None], alpha_m[None, :])
            delta_alphas_jmkn = delta_alphasjm[:, None, :,
                                               None] * delta_alphasjm[None, :,
                                                                      None, :]

            delta_perm1 = delta_alphas12 * delta_alphas_jmkn

            # permutation 3
            delta_alphas1m = delta_alpha2(alpha_1[0, None],
                                          alpha_m[None, :]).flatten()
            delta_alphasjn = delta_alpha2(alpha_j[:, None], alpha_n[None, :])
            delta_alphask2 = delta_alpha2(alpha_k[:, None],
                                          alpha_2[None, 0]).flatten()

            delta_perm3 = delta_alphas1m[None, None, :, None] * delta_alphasjn[:, None, None, :] * \
                delta_alphask2[None, :, None, None]

            # permutation 5
            delta_alphas1n = delta_alpha2(alpha_1[0, None],
                                          alpha_n[None, :]).flatten()
            delta_alphasj2 = delta_alpha2(alpha_j[:, None],
                                          alpha_2[None, 0]).flatten()
            delta_alphaskm = delta_alpha2(alpha_k[:, None], alpha_m[None, :])

            delta_perm5 = delta_alphas1n[None, None, None, :] * delta_alphaskm[None, :, :, None] * \
                delta_alphasj2[:, None, None, None]

            # --------------------------------------------------
            # BUILD THE KERNEL
            # --------------------------------------------------

            # Squared exp of differences
            se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2))
            se_jkmn = T.exp(
                -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 /
                (2 * sig**2))
            se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 /
                            (2 * sig**2))
            se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 /
                            (2 * sig**2))

            # Kernel not summed (cyclic permutations)
            k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] *
                   se_jkmn)
            k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] *
                   se_1j2m[None, :, :, None])
            k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] *
                   se_1jmn[None, :, :, :])

            # final shape is M1 M1 M2 M2

            ker_loc = k1n * delta_perm1 + k2n * delta_perm3 + k3n * delta_perm5

            # Faster version of cutoff (less calculations)
            cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc))
            cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc))

            cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * (
                1 + T.cos(np.pi * rjk / rc))
            cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * (
                1 + T.cos(np.pi * rmn / rc))

            # --------------------------------------------------
            # REMOVE DIAGONAL ELEMENTS
            # --------------------------------------------------

            # remove diagonal elements AND lower triangular ones from first configuration
            mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk)

            # remove diagonal elements from second configuration
            mask_mn = T.ones_like(rmn) - T.identity_like(rmn)

            # Combine masks
            mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :]

            # Apply mask and then apply cutoff functions
            ker_loc = ker_loc * mask_jkmn
            ker_loc = T.sum(ker_loc * cut_jk[:, :, None, None] *
                            cut_mn[None, None, :, :])

            ker_loc = T.exp(ker_loc / 20)

            # --------------------------------------------------
            # FINAL FUNCTIONS
            # --------------------------------------------------

            # energy energy kernel
            k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                ker_loc,
                                on_unused_input='ignore')

            # energy force kernel
            k_ef_cut = T.grad(ker_loc, r2)
            k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ef_cut,
                                on_unused_input='ignore')

            # force force kernel
            k_ff_cut = T.grad(ker_loc, r1)
            k_ff_cut_der, updates = scan(
                lambda j, k_ff_cut, r2: T.grad(k_ff_cut[j], r2),
                sequences=T.arange(k_ff_cut.shape[0]),
                non_sequences=[k_ff_cut, r2])
            k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ff_cut_der,
                                on_unused_input='ignore')

            # Save the function that we want to use for multiprocessing
            # This is necessary because theano is a crybaby and does not want to access the
            # Automaticallly stored compiled object from different processes
            with open(Mffpath / 'k3_ee_m.pickle', 'wb') as f:
                pickle.dump(k_ee_fun, f)
            with open(Mffpath / 'k3_ef_m.pickle', 'wb') as f:
                pickle.dump(k_ef_fun, f)
            with open(Mffpath / 'k3_ff_m.pickle', 'wb') as f:
                pickle.dump(k_ff_fun, f)

        else:
            print("Loading Kernels")
            with open(Mffpath / "k3_ee_m.pickle", 'rb') as f:
                k_ee_fun = pickle.load(f)
            with open(Mffpath / "k3_ef_m.pickle", 'rb') as f:
                k_ef_fun = pickle.load(f)
            with open(Mffpath / "k3_ff_m.pickle", 'rb') as f:
                k_ff_fun = pickle.load(f)

        # WRAPPERS (we don't want to plug the position of the central element every time)

        def km_ee(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for energy-energy correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (float): scalar valued energy-energy many-body kernel

            """
            return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        def km_ef(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for energy-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (array): 3x1 energy-force many-body kernel

            """
            return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig,
                             theta, rc)

        def km_ff(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for force-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (matrix): 3x3 force-force many-body kernel

            """
            return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        logger.info("Ended compilation of theano many body kernels")

        return km_ee, km_ef, km_ff
Esempio n. 56
0
 def test_duplicate_updates(self):
     x, y = dmatrices('x', 'y')
     z = shared(numpy.ones((2, 3)))
     self.assertRaises(ValueError, theano.function, [x, y], [z],
             updates=[(z, (z + x + y)), (z, (z - x))])
    def __init__(self, params,correct,Xinfo, samples = 500,batch_size=None):
        ker = kernel()
        mmd = MMD()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        self.Xlabel_value=Xinfo["Xlabel_value"]
        self.Weight_value=Xinfo["Weight_value"]
        
        #データの保存ファイル
        model_file_name = 'model_MMD_kernel' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Xlabel=T.dmatrix('Xlabel')
        Zlabel=T.dmatrix('Zlabel')
        
        Zlabel_T=T.exp(Zlabel)/T.sum(T.exp(Zlabel),1)[:,None]#ラベルは確率なので正の値でかつ、企画化されている
        
        Weight=T.dmatrix('Weight')
        
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        ga=T.dvector('ga')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls)
        gamma=T.exp(ga[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        S=T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        Kmm=mmd.MMD_kenel_Xonly(gamma,Zlabel_T,Kmm,Weight)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Kmn=mmd.MMD_kenel_ZX(gamma,Zlabel_T,Xlabel,Kmn,Weight)
        
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        Knn=mmd.MMD_kenel_Xonly(gamma,Xlabel,Knn,Weight)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        betaI=T.diag(T.dot(Xlabel,beta))
        Covariance = betaI       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))*correct              
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        


        inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 
            'eps_M': eps_M, 'eps_NQ': eps_NQ,'ga':ga,'Zlabel':Zlabel,'Weight':Weight,'Xlabel':Xlabel}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls,'ga':ga,'Zlabel':Zlabel}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(10000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
    def __init__(self, params,correct, samples = 500,batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        
        #データの保存ファイル
        model_file_name = 'model2' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Wx, Ws, Wu=\
        T.dmatrices('Wx', 'Ws', 'Wu')

        bx, bs, bu=\
        T.dvectors('bx', 'bs', 'bu')

        gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\
        T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s")
    
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        #隠れ層の生成
        out1=self.neural_net_predict(Wx,bx,gamma_x,beta_x,X)
        m=self.neural_net_predict(Wu,bu,gamma_u,beta_u,out1)
        S=self.neural_net_predict(Ws,bs,gamma_s,beta_s,out1)
        #outputs1 = T.dot(X,Wx) + bx
        #m = T.dot(out1,Wu) + bu
        #S=T.dot(out1,Ws) + bs
                 
        S=T.exp(S)
        S=T.sqrt(S)
        
        Xtilda = m+S*eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        Covariance = beta       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct      
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        

        inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\
                  "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 59
0
    def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
        ker, self.samples, self.params, self.KmmInv  = kernel(), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 
            'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
        
        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
                       'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 
            'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
        self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)