Beispiel #1
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        X,u = T.dmatrices("X","u")
        f, y, v = T.dcols("f", "y", "v")
        
        mu = self.params[0]
        logSigma = self.params[1]
        logLambda = sharedX(np.log(self.sigma_e),name='logLambda')
        #logLambda = self.params[2]

        negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        f = self.regression_simulator(X,u,v,mu,logSigma)

        logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f)**2)/(T.exp(logLambda)**2)/self.Lu

        elbo = (negKL + logLike)
        obj = -elbo
        self.lowerboundfunction = th.function([X, y, u, v], obj, on_unused_input='ignore')
        derivatives = T.grad(obj,self.params)
        self.gradientfunction = th.function([X,y,u,v], derivatives, on_unused_input='ignore')
Beispiel #2
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R")
        mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') 
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1])
        negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        theta = mu+T.exp(logSigma)*v
        W=theta
        y=X[:,0]
        X_sim=X[:,1:]
        f = (T.dot(X_sim,W)+u).flatten()
        
        gradvariables = [mu, logSigma, logLambd]
        
        
        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2)

        logp = (negKL + logLike)/self.m

        optimizer = -logp
        
        self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore')
        self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore')
        derivatives = T.grad(logp,gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
Beispiel #3
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        X, u = T.dmatrices("X", "u")
        f, y, v = T.dcols("f", "y", "v")

        mu = self.params[0]
        logSigma = self.params[1]
        logLambda = sharedX(np.log(self.sigma_e), name='logLambda')
        #logLambda = self.params[2]

        negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 -
                                                  T.exp(logSigma)**2)
        f = self.regression_simulator(X, u, v, mu, logSigma)

        logLike = -self.m * (0.5 * np.log(2 * np.pi) +
                             logLambda) - 0.5 * T.sum(
                                 (y - f)**2) / (T.exp(logLambda)**2) / self.Lu

        elbo = (negKL + logLike)
        obj = -elbo
        self.lowerboundfunction = th.function([X, y, u, v],
                                              obj,
                                              on_unused_input='ignore')
        derivatives = T.grad(obj, self.params)
        self.gradientfunction = th.function([X, y, u, v],
                                            derivatives,
                                            on_unused_input='ignore')
    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")
        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")
        z1 = T.col("z1")
        if self.continuous:
            #convolve x
            # no_filters = 100, stride = 4, filter_size = 50

            h_encoder = T.tanh(T.dot(W1,x) + b1)
            #h_encoder = T.dot(W1,x) + b1
        else:   
            h_encoder = T.tanh(T.dot(W1,x) + b1)

        mu_encoder = T.dot(W2,h_encoder) + b2
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        mu_encoder = T.dot(W2,h_encoder) + b2 
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder)*eps

        prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))


        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            h_dec = T.nnet.softplus(T.dot(W4,z1) + b4)

            #h_decoder = T.dot(W4,z) + b4
            #h_dec = T.dot(W4,z1) + b4

            mu_decoder = T.tanh(T.dot(W5,h_decoder) + b5)
            mu_dec = T.tanh(T.dot(W5,h_dec) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
        else:
            h_decoder = T.tanh(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)
        
        self.get_z = th.function(gradvariables+[x,eps],z,on_unused_input='ignore')
        self.generate = th.function(gradvariables+[z1,x,eps],mu_dec,on_unused_input='ignore')
        self.predict = th.function(gradvariables+[x,eps],mu_decoder,on_unused_input='ignore')
        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
    def createGradientFunctions(self):
        #Create the Theano variables
        W1, W2, W3, W4, W5, W6, x, eps = T.dmatrices("W1", "W2", "W3", "W4",
                                                     "W5", "W6", "x", "eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1, b2, b3, b4, b5, b6 = T.dcols("b1", "b2", "b3", "b4", "b5", "b6")

        if self.continuous:
            h_encoder = T.nnet.softplus(T.dot(W1, x) + b1)
        else:
            h_encoder = T.tanh(T.dot(W1, x) + b1)

        mu_encoder = T.dot(W2, h_encoder) + b2
        log_sigma_encoder = 0.5 * (T.dot(W3, h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder) * eps

        prior = 0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 -
                            T.exp(2 * log_sigma_encoder))

        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4, z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            log_sigma_decoder = 0.5 * (T.dot(W6, h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) -
                           0.5 *
                           ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1, W2, W3, W4, W5, W6, b1, b2, b3, b4, b5, b6]
        else:
            h_decoder = T.tanh(T.dot(W4, z) + b4)
            y = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y, x).sum()
            gradvariables = [W1, W2, W3, W4, W5, b1, b2, b3, b4, b5]

        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp, gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [x, eps],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x, eps],
                                              logp,
                                              on_unused_input='ignore')
        self.zfunction = th.function(gradvariables + [x, eps],
                                     z,
                                     on_unused_input='ignore')
Beispiel #6
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f",
                                           "R")
        mu = sharedX(np.random.normal(10, 10, (self.dimTheta, 1)), name='mu')
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)),
                           name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),
                           name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"), [1, 1])
        negKL = 0.5 * T.sum(1 + 2 * logSigma - mu**2 - T.exp(logSigma)**2)
        theta = mu + T.exp(logSigma) * v
        W = theta
        y = X[:, 0]
        X_sim = X[:, 1:]
        f = (T.dot(X_sim, W) + u).flatten()

        gradvariables = [mu, logSigma, logLambd]

        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 *
                        ((y - f) / (T.exp(logLambd)))**2)

        logp = (negKL + logLike) / self.m

        optimizer = -logp

        self.negKL = th.function([mu, logSigma],
                                 negKL,
                                 on_unused_input='ignore')
        self.f = th.function(gradvariables + [X, u, v],
                             f,
                             on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v],
                                   logLike,
                                   on_unused_input='ignore')
        derivatives = T.grad(logp, gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v],
                                              logp,
                                              on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer,
                                              params=gradvariables,
                                              inputs=[X, u, v],
                                              conjugate=True,
                                              max_iter=1)
    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6,pi = T.dcols("b1","b2","b3","b4","b5","b6","pi")
        
        if self.continuous:
            h_encoder = T.nnet.softplus(T.dot(W1,x) + b1)
        else:   
            h_encoder = T.tanh(T.dot(W1,x) + b1)
        print type(pi)    
        rng = T.shared_randomstreams.RandomStreams(seed=124)
        i = rng.choice(size=(1,), a=self.num_model, p=T.nnet.softmax(pi.T).T.flatten())

        mu_encoder = T.dot(W2[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder) + b2[i[0]*self.dimZ:(1+i[0])*self.dimZ]
        log_sigma_encoder = (0.5*(T.dot(W3[i[0]*self.dimZ:(1+i[0])*self.dimZ],h_encoder)))+ b3[i[0]*self.dimZ:(1+i[0])*self.dimZ]

        z = mu_encoder + T.exp(log_sigma_encoder)*eps
     
        
        prior = 0
        for i in range(self.num_model):
            prior += T.exp(pi[i][0])*0.5* T.sum(1 + 2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ] - mu_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ]**2 - T.exp(2*log_sigma_encoder[int(i)*self.dimZ:(1+int(i))*self.dimZ]))
        prior /= T.sum(T.exp(pi))
        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6,pi]
        else:
            h_decoder = T.tanh(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5,pi]


        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logpxz)
        
        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
        self.hiddenstatefunction = th.function(gradvariables + [x,eps], z, on_unused_input='ignore')
Beispiel #8
0
    def createGradientFunctions(self):
        #Create the Theano variables
        W1, W2, W3, W4, W5, W7, x, eps = T.dmatrices("W1", "W2", "W3", "W4",
                                                     "W5", "W7", "x", "eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1, b2, b3, b4, b5, b7 = T.dcols("b1", "b2", "b3", "b4", "b5", "b7")

        if self.continuous_data:
            h_encoder = T.nnet.softplus(T.dot(W1, x) + b1)
        else:
            h_encoder = T.tanh(T.dot(W1, x) + b1)

        mu_encoder = T.dot(W2, h_encoder) + b2
        log_sigma_encoder = 0.5 * (T.dot(W3, h_encoder) + b3)

        L_u = T.tril(log_L_u - T.diag(T.diag(log_L_u)) +
                     T.diag(T.exp(T.diag(log_L_u))))
        # To do: Better ways of paramterising the covariance (see: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.494&rep=rep1&type=pdf)

        #Compute GP objects
        K_ff = self.ker.RBF(sf2, ell, X)
        K_uu = self.ker.RBF(sf2, ell, X_u)
        K_uu_inv = nlinalg.matrix_inverse(K_uu)
        L_f = slinalg.cholesky(K_ff - T.dot(K_fu, T.dot(K_uu_inv, K_fu.T)))
        # f_i make up the columns of f, simiarly for m_u_i
        u = m_u + T.dot(L_u, eps_u)  #n_induce iid pseudo inducing sets
        f = T.dot(K_fu, T.dot(K_uu_inv, u)) + T.dot(L_f, X)

        #Find the hidden variable z
        # log_sigma_lhood = 0.5*(T.dot(W9,f) + b9) # the var GP maps to both mean *and* covariance
        sigma_var_lhood = sigma_z**2 * T.eye(self.dimZ)
        L_z = slinalg.cholesky(sigma_var_lhood)
        z = f + T.dot(L_z, eps_z)
        # z = mu_encoder + T.exp(log_sigma_encoder)*eps

        prior = 0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 -
                            T.exp(2 * log_sigma_encoder))

        #Set up decoding layer
        if self.continuous_data:
            h_decoder = T.nnet.softplus(T.dot(W4, z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            log_sigma_decoder = 0.5 * (T.dot(W7, h_decoder) + b7)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) -
                           0.5 *
                           ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [
                W1, W2, W3, W4, W5, W7, b1, b2, b3, b4, b5, b7, sf2, ell, X_u,
                m_u, L_u
            ]
        else:
            h_decoder = T.tanh(T.dot(W4, z) + b4)
            y = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y, x).sum()
            gradvariables = [
                W1, W2, W3, W4, W5, b1, b2, b3, b4, b5, sf2, ell, X_u, m_u, L_u
            ]

        #Set up auxiliary layer
        if self.continuous_data:
            h_auxiliary = T.nnet.softplus(T.dot(W6, [x, z]) + b6)
            mu_auxiliary = T.nnet.sigmoid(T.dot(W7, h_auxiliary) + b7)
            log_sigma_auxiliary = 0.5 * (T.dot(W8, h_auxiliary) + b8)
        else:
            pass  #to do

        logp = logpxz + prior

        #Compute KL terms
        # KL_qp = -0.5*T.sum(1.0 + 2*log_sigma_lhood - f**2 - T.exp(2*log_sigma_lhood))
        KL_qp = 0.5 * (T.dot(f.T, f) +
                       T.trace(sigma_var_lhood + T.log(T.eye(self.dimZ)) -
                               T.log(sigma_var_lhood)) - self.dimZ)
        KL_qr = 0.5 * (T.dot(
            (mu_auxiliary - mu_encoder).T,
            T.dot(T.diag(1.0 / T.exp(log_sigma_auxiliary)),
                  mu_auxiliary - mu_decoder)) + T.trace(
                      T.dot(T.diag(1.0 / T.exp(log_sigma_auxiliary)),
                            T.dot(L_u, L_u.T)) + log_sigma_auxiliary -
                      log_sigma_encoder) - self.dimXf - self.dimf)

        #Compute bound and all the gradients
        stoch_bound = logpxz - KL_qp - KL_qr
        derivatives = T.grad(stoch_bound, gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(stoch_bound)

        self.gradientfunction = th.function(gradvariables +
                                            [x, eps_u, eps_z, X],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables +
                                              [x, eps_u, eps_z, X],
                                              stoch_bound,
                                              on_unused_input='ignore')
        self.zfunction = th.function(gradvariables + [x, eps_u, eps_z, X],
                                     z,
                                     on_unused_input='ignore')
Beispiel #9
0
    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")

        if self.continuous:
            h_encoder = T.nnet.softplus(T.dot(W1,x) + b1)
        else:
            # h_encoder = T.tanh(T.dot(W1,x) + b1)
            def relu(x):
                return 0.5 * (x + abs(x))
            h_encoder = relu((T.dot(W1,x) + b1))

        mu_encoder = T.dot(W2,h_encoder) + b2
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder)*eps

        KL_divergence = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))


        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            logpxz_individual = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2, axis=0)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
        else:
            # h_decoder = T.tanh(T.dot(W4,z) + b4)
            def relu(x):
                return 0.5 * (x + abs(x))
            h_decoder = relu(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz_individual = -T.nnet.binary_crossentropy(y,x).sum(axis=0)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]


        logp = logpxz + KL_divergence

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        # derivatives.append(logp)
        ## This is more confusing


        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
        self.latentvarfunction = th.function(gradvariables + [x], [mu_encoder.T, log_sigma_encoder.T], on_unused_input='ignore')

        # if self.continuous:
        #     self.reconstructionfunction = th.function(gradvariables + [x, eps], [mu_decoder.T, log_sigma_decoder.T], on_unused_input='ignore')
        # else:
        #     self.reconstructionfunction = th.function(gradvariables + [x, eps], logpxz_individual.T, on_unused_input='ignore')
        if not self.continuous:
            self.recon_mnist = th.function(gradvariables + [x, eps], y, on_unused_input='ignore')
        self.reconstructionfunction = th.function(gradvariables + [x, eps], logpxz_individual.T, on_unused_input='ignore')