def get_opt_A(self, sn_trf, EPhiTPhi, XT_EPhi, K_MM):
     cholSigInv = sT.cholesky(EPhiTPhi + sn_trf * T.identity_like(K_MM))
     cholK_MM = sT.cholesky(K_MM + 1e-6 * T.identity_like(K_MM))
     invCholSigInv = sT.matrix_inverse(cholSigInv)
     invCholK_MM = sT.matrix_inverse(cholK_MM)
     InvSig = invCholSigInv.T.dot(invCholSigInv)
     InvK_MM = invCholK_MM.T.dot(invCholK_MM)
     Sig_EPhiT_X = InvSig.dot(XT_EPhi.T)
     return Sig_EPhiT_X, cholSigInv, cholK_MM, InvK_MM
Example #2
0
    def __init__(self, mu, sigma, random_state=None):
        super(MultivariateNormal, self).__init__(mu=mu, sigma=sigma)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self.make_(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = ((1. / T.sqrt(
            (2. * np.pi)**self.ndim_ * T.abs_(sigma_det))) *
                     T.exp(-0.5 * T.sum(T.mul(
                         T.dot(self.X - self.mu, sigma_inv), self.X - self.mu),
                                        axis=1))).ravel()
        self.make_(self.pdf_, "pdf")

        # -log pdf
        self.nll_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self.make_(self.nll_, "nll")

        # self.rvs_
        self.make_(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
Example #3
0
 def get_opt_A(self, sn_trf, EPhiTPhi, XT_EPhi):
     SigInv = EPhiTPhi + (sn_trf + 1e-6) * T.identity_like(EPhiTPhi)
     cholSigInv = sT.cholesky(SigInv)
     invCholSigInv = sT.matrix_inverse(cholSigInv)
     InvSig = invCholSigInv.T.dot(invCholSigInv)
     Sig_EPhiT_X = InvSig.dot(XT_EPhi.T)
     return Sig_EPhiT_X, cholSigInv
Example #4
0
File: normal.py Project: ibab/carl
    def __init__(self, mu, sigma, random_state=None):
        super(MultivariateNormal, self).__init__(mu=mu,
                                                 sigma=sigma,
                                                 random_state=random_state,
                                                 optimizer=None)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self.make_(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = (
            (1. / T.sqrt((2. * np.pi) ** self.ndim_ * T.abs_(sigma_det))) *
            T.exp(-0.5 * T.sum(T.mul(T.dot(self.X - self.mu,
                                           sigma_inv),
                                     self.X - self.mu),
                               axis=1))).ravel()
        self.make_(self.pdf_, "pdf")

        # -log pdf
        self.nnlf_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self.make_(self.nnlf_, "nnlf")

        # self.rvs_
        self.make_(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
Example #5
0
 def get_opt_A(self, tau, EPhiTPhi, YT_EPhi):
     SigInv = EPhiTPhi + (tau**-1 + 1e-4) * T.identity_like(EPhiTPhi)
     cholTauSigInv = tau**0.5 * sT.cholesky(SigInv)
     invCholTauSigInv = sT.matrix_inverse(cholTauSigInv)
     tauInvSig = invCholTauSigInv.T.dot(invCholTauSigInv)
     Sig_EPhiT_Y = tau * tauInvSig.dot(YT_EPhi.T)
     return Sig_EPhiT_Y, tauInvSig, cholTauSigInv
Example #6
0
    def get_model(self, X, Y, x_test):
        '''
        Gaussian Process Regression model.
        Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006

        Args:
            X: tensor matrix, training data
            Y: tensor matrix, training target
            x_test: tensor matrix, testing data
        
        Returns:
            K: prior cov matrix
            Ks: prior joint cov matrix
            Kss: prior cov matrix for testing data
            Posterior Distribution:
                alpha: alpha = inv(K)*(mu-m)
                sW: vector containing diagonal of sqrt(W)
                L: L = chol(sW*K*sW+eye(n))
            y_test_mu: predictive mean
            y_test_var: predictive variance
            fs2: predictive latent variance
        Note: the cov matrix inverse is computed through Cholesky factorization
        https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/
        '''
        # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14)
        K = self.covFunc(X, X, 'K')  # pior cov
        #m = T.mean(Y)*T.ones_like(Y) # pior mean
        m = self.mean * T.ones_like(Y)  # pior mean

        # Compute GP joint prior distribution between training and test (eq 2.18)
        Ks = self.covFunc(X, x_test, 'Ks')
        # Pay attention!! here is the self test cov matrix.
        Kss = self.covFunc(x_test, x_test, 'Kss', mode='self_test')

        # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood.
        sn2 = T.exp(2 * self.sigma_n)  # noise variance of likGauss
        L = sT.cholesky(K / sn2 + T.identity_like(K))
        sl = sn2
        alpha = T.dot(sT.matrix_inverse(L.T),
                      T.dot(sT.matrix_inverse(L), (Y - m))) / sl
        sW = T.ones_like(T.sum(K, axis=1)).reshape(
            (K.shape[0], 1)) / T.sqrt(sl)
        log_likelihood = T.sum(-0.5 * (T.dot((Y - m).T, alpha)) -
                               T.sum(T.log(T.diag(L))) -
                               X.shape[0] / 2 * T.log(2. * np.pi * sl))

        # Compute predictive distribution using the computed posterior distribution.
        fmu = m + T.dot(Ks.T, alpha)  # Prediction Mu fs|f, eq 2.25
        V = T.dot(sT.matrix_inverse(L),
                  T.extra_ops.repeat(sW, x_test.shape[0], axis=1) * Ks)
        fs2 = Kss - (T.sum(V * V, axis=0)).reshape(
            (1, V.shape[1])).T  # Predication Sigma, eq 2.26
        fs2 = T.maximum(fs2, 0)  # remove negative variance noise
        #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions

        y_test_mu = fmu
        y_test_var = fs2 + sn2

        return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha, V, fs2, sW
Example #7
0
 def build_theano_models(self, algo, algo_params):
     epsilon = 1e-6
     kl = lambda mu, sig: sig+mu**2-TT.log(sig)
     X, y = TT.dmatrices('X', 'y')
     params = TT.dvector('params')
     a, b, c, l_F, F, l_FC, FC = self.unpack_params(params)
     sig2_n, sig_f = TT.exp(2*a), TT.exp(b)
     l_FF = TT.dot(X, l_F)+l_FC
     FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1)
     Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1)
     Phi = sig_f*TT.sqrt(2./self.M)*Phi
     noise = TT.log(1+TT.exp(c))
     PhiTPhi = TT.dot(Phi.T, Phi)
     A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi)
     L = Tlin.cholesky(A)
     Li = Tlin.matrix_inverse(L)
     PhiTy = Phi.T.dot(y)
     beta = TT.dot(Li, PhiTy)
     alpha = TT.dot(Li.T, beta)
     mu_f = TT.dot(Phi, alpha)
     var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None]
     dsp = noise*(var_f+1)
     mu_l = TT.sum(TT.mean(l_F, axis=1))
     sig_l = TT.sum(TT.std(l_F, axis=1))
     mu_w = TT.sum(TT.mean(F, axis=1))
     sig_w = TT.sum(TT.std(F, axis=1))
     hermgauss = np.polynomial.hermite.hermgauss(30)
     herm_x = Ts(hermgauss[0])[None, None, :]
     herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :]
     herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None]
     nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*(
         TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None])
     enll = herm_w*nlk
     nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*(
         (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a
     penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M)
     cost = (nlml+penelty)/X.shape[0]
     grads = TT.grad(cost, params)
     updates = getattr(OPT, algo)(self.params, grads, **algo_params)
     updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9)
     train_inputs = [X, y]
     train_outputs = [cost, alpha, Li]
     self.train_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)])
     self.train_iter_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)], updates=updates)
     Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha')
     l_FFs = TT.dot(Xs, l_F)+l_FC
     FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1)
     Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1)
     Phis = sig_f*TT.sqrt(2./self.M)*Phis
     mu_pred = TT.dot(Phis, alpha)
     std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5
     pred_inputs = [Xs, alpha, Li]
     pred_outputs = [mu_pred, std_pred]
     self.pred_func = Tf(pred_inputs, pred_outputs,
         givens=[(params, self.params)])
Example #8
0
    def get_model(self,X, Y, x_test):
        '''
        Gaussian Process Regression model.
        Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006

        Args:
            X: tensor matrix, training data
            Y: tensor matrix, training target
            x_test: tensor matrix, testing data
        
        Returns:
            K: prior cov matrix
            Ks: prior joint cov matrix
            Kss: prior cov matrix for testing data
            Posterior Distribution:
                alpha: alpha = inv(K)*(mu-m)
                sW: vector containing diagonal of sqrt(W)
                L: L = chol(sW*K*sW+eye(n))
            y_test_mu: predictive mean
            y_test_var: predictive variance
            fs2: predictive latent variance
        Note: the cov matrix inverse is computed through Cholesky factorization
        https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/
        '''
        # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14)
        K = self.covFunc(X,X,'K') # pior cov
        #m = T.mean(Y)*T.ones_like(Y) # pior mean
        m = self.mean*T.ones_like(Y) # pior mean

        # Compute GP joint prior distribution between training and test (eq 2.18)
        Ks = self.covFunc(X,x_test,'Ks')
        # Pay attention!! here is the self test cov matrix.
        Kss = self.covFunc(x_test,x_test,'Kss',mode='self_test')

        # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood.
        sn2 = T.exp(2*self.sigma_n) # noise variance of likGauss
        L = sT.cholesky(K/sn2 + T.identity_like(K))
        sl = sn2
        alpha = T.dot(sT.matrix_inverse(L.T), 
                      T.dot(sT.matrix_inverse(L), (Y-m)) ) / sl
        sW = T.ones_like(T.sum(K,axis=1)).reshape((K.shape[0],1)) / T.sqrt(sl)
        log_likelihood = T.sum(-0.5 * (T.dot((Y-m).T, alpha)) - T.sum(T.log(T.diag(L))) - X.shape[0] / 2 * T.log(2.*np.pi*sl))
        
        
        # Compute predictive distribution using the computed posterior distribution.
        fmu = m + T.dot(Ks.T, alpha) # Prediction Mu fs|f, eq 2.25 
        V = T.dot(sT.matrix_inverse(L),T.extra_ops.repeat(sW,x_test.shape[0],axis=1)*Ks)
        fs2 = Kss - (T.sum(V*V,axis=0)).reshape((1,V.shape[1])).T # Predication Sigma, eq 2.26
        fs2 = T.maximum(fs2,0) # remove negative variance noise
        #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions

        y_test_mu = fmu
        y_test_var = fs2 + sn2

        return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha,V, fs2,sW
    def KLD_U(self, m, L_scaled, Kmm,
              KmmInv):  #N(u|m,S)とN(u|0,Kmm) S=L*L.T(コレスキー分解したのを突っ込みましょう)
        M = m.shape[0]
        D = m.shape[1]
        #KmmInv = sT.matrix_inverse(Kmm)

        KL_U = D * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M -
                    2.0 * T.sum(T.log(T.diag(L_scaled))) +
                    2.0 * T.sum(T.log(T.diag(sT.cholesky(Kmm)))))
        KL_U += T.sum(T.dot(KmmInv, m) * m)

        return 0.5 * KL_U
Example #10
0
 def compile_theano_funcs(self, opt_algo, opt_params, dropout):
     self.compiled_funcs = {}
     # Compile Train & Optimization Function
     eps = 1e-5
     params = Tt.vector('params')
     X, Y = Tt.matrix('X'), Tt.matrix('Y')
     sig2, F, M, V = self.feature_maps(X, params)
     EPhi = F[-1]
     EPhiPhiT = Tt.dot(EPhi, Tt.transpose(EPhi))
     A = EPhiPhiT + (sig2 + eps) * Tt.identity_like(EPhiPhiT)
     L = Tlin.cholesky(A)
     Linv = Tlin.matrix_inverse(L)
     YPhiT = Tt.dot(Y, Tt.transpose(EPhi))
     beta = Tt.dot(YPhiT, Tt.transpose(Linv))
     alpha = Tt.dot(beta, Linv)
     mu_F = Tt.dot(alpha, EPhi)
     GOF = .5 / sig2 * Tt.sum(Tt.sum(Tt.dot(Y, (Y - mu_F).T)))
     REG = Tt.sum(Tt.log(
         Tt.diagonal(L))) + (self.N - self.D[-2]) / 2 * Tt.log(sig2)
     REG *= self.D[-1]
     KL = 0
     for h in range(self.H):
         KL += Tt.sum(Tt.sum(M[h]**2) + Tt.sum(V[h] - Tt.log(V[h] + eps)))
         KL -= self.D[h + 1] * self.D[h + 2] // 2
     obj = debug('obj', GOF + REG + KL)
     self.compiled_funcs['debug'] = Tf([X, Y], [obj],
                                       givens=[(params, self.params)])
     grads = Tt.grad(obj, params)
     updates = {self.params: grads}
     updates = getattr(Optimizer, opt_algo)(updates, **opt_params)
     updates = getattr(Optimizer, 'nesterov')(updates, momentum=0.9)
     train_inputs = [X, Y]
     train_outputs = [obj, alpha, Linv, mu_F]
     self.compiled_funcs['opt'] = Tf(train_inputs,
                                     train_outputs,
                                     givens=[(params, self.params)],
                                     updates=updates)
     self.compiled_funcs['train'] = Tf(train_inputs,
                                       train_outputs,
                                       givens=[(params, self.params)])
     # Compile Predict Function
     Linv, alpha = Tt.matrix('Linv'), Tt.matrix('alpha')
     Xs = Tt.matrix('Xs')
     sig2, Fs, _, _ = self.feature_maps(Xs, params)
     EPhis = Fs[-1]
     mu_Fs = Tt.dot(alpha, EPhis)
     std_Fs = ((sig2 * (1 + (Tt.dot(Linv, EPhis)**2).sum(0)))**0.5)[:, None]
     pred_inputs = [Xs, alpha, Linv]
     pred_outputs = [mu_Fs, std_Fs]
     self.compiled_funcs['pred'] = Tf(pred_inputs,
                                      pred_outputs,
                                      givens=[(params, self.params)])
Example #11
0
    def _build_graph(self):
        """Sets up the gaussian process's tensor variables."""
        X = self.X
        Y = self.Y
        x = self.x
        reg = self.reg

        if self._normalize_y:
            Y_mean = T.mean(Y, axis=0)
            Y_variance = T.std(Y, axis=0)
            Y = (Y - Y_mean) / Y_variance

        # Kernel functions.
        K_ss = self._kernel(x, x)
        K_s = self._kernel(x, X)
        K = self._kernel(X, X) + self._sigma_n**2 * T.eye(X.shape[0])

        # Guarantee positive definite.
        K = 0.5 * (K + K.T) + reg * T.eye(K.shape[0])

        # Mean and variance functions.
        K_inv = sT.matrix_inverse(K)
        mu = T.dot(K_s, T.dot(K_inv, self.Y))  # Non-normalized Y for scale.
        var = K_ss - T.dot(K_s, T.dot(K_inv, K_s.T))

        # Compute the standard deviation.
        L = sT.cholesky(K)
        L_k = T.slinalg.solve_lower_triangular(L, K_s.T)
        std = T.sqrt(T.diag(K_ss) - T.sum(L_k**2, axis=0)).reshape((-1, 1))

        # Compute the log likelihood.
        log_likelihood_dims = -0.5 * T.dot(Y.T, T.dot(K_inv, Y)).sum(axis=0)
        log_likelihood_dims -= T.log(T.diag(L)).sum()
        log_likelihood_dims -= L.shape[0] / 2 * T.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(axis=-1)

        self._mu = mu
        self._var = var
        self._std = std
        self._log_likelihood = log_likelihood
Example #12
0
    def __init__(self, mu, sigma):
        """Constructor.

        Parameters
        ----------
        * `mu` [1d array]:
            The means.

        * `sigma` [2d array]:
            The covariance matrix.
        """
        super(MultivariateNormal, self).__init__(mu=mu, sigma=sigma)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self._make(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = (
            (1. / T.sqrt((2. * np.pi) ** self.ndim_ * T.abs_(sigma_det))) *
            T.exp(-0.5 * T.sum(T.mul(T.dot(self.X - self.mu,
                                           sigma_inv),
                                     self.X - self.mu),
                               axis=1))).ravel()
        self._make(self.pdf_, "pdf")

        # -log pdf
        self.nll_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self._make(self.nll_, "nll")

        # self.rvs_
        self._make(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
 def KLD_U(self, m, L_scaled, Kmm,KmmInv):#N(u|m,S)とN(u|0,Kmm) S=L*L.T(コレスキー分解したのを突っ込みましょう)
     M = m.shape[0]
     D = m.shape[1]
     #KmmInv = sT.matrix_inverse(Kmm)
     
     KL_U = D * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm)))))
     KL_U += T.sum(T.dot(KmmInv,m)*m) 
     
     return 0.5*KL_U
Example #14
0
    def __init__(self,
                 params,
                 sx2=1,
                 linear_model=False,
                 samples=20,
                 use_hat=False):
        ker, self.samples, self.params, self.KmmInv = kernel(
        ), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + (
            '_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices(
            'Y', 'Z', 'm', 'ls', 'mu', 'lL', 'eps_MK', 'eps_NQ', 'eps_NK',
            'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(
            lhyp[2:2 + Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))

        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp],
                                        KmmInv_cache,
                                        name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {
            'Z':
            theano.function([Z, lhyp],
                            T.jacobian(Kmm.flatten(), Z),
                            name='dKmm_dZ'),
            'lhyp':
            theano.function([Z, lhyp],
                            T.jacobian(Kmm.flatten(), lhyp),
                            name='dKmm_dlhyp')
        }

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(
                sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(
                sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:, None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N, 1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(
                    mu_scaled[:, None, :] * mu_scaled[None, :, :], 2)) + K *
                               (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) -
                                M - 2.0 * T.sum(T.log(T.diag(L_scaled))) +
                                2.0 * T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(
                sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(
                sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:, None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N, 1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(
                    mu_scaled[:, None, :] * mu_scaled[None, :, :], 2)) + K *
                               (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M -
                                2.0 * T.sum(T.log(T.diag(L_scaled))) -
                                2.0 * T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum(
            (m**2.0 + s**2.0) / sx2 - 1.0 - 2.0 * ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {
            'Y': Y,
            'Z': Z,
            'm': m,
            'ls': ls,
            'mu': mu,
            'lL': lL,
            'lhyp': lhyp,
            'KmmInv': KmmInv,
            'eps_MK': eps_MK,
            'eps_NQ': eps_NQ,
            'eps_NK': eps_NK
        }
        z = 0.0 * sum([
            T.sum(v) for v in inputs.values()
        ])  # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'],
                [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {
            n: theano.function(inputs.values(),
                               f + z,
                               name=n,
                               on_unused_input='ignore')
            for n, f in f
        }
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {
            'Z': Z,
            'm': m,
            'ls': ls,
            'mu': mu,
            'lL': lL,
            'lhyp': lhyp,
            'KmmInv': KmmInv
        }
        self.g = {
            vn: {
                gn: theano.function(inputs.values(),
                                    T.grad(gv + z, vv),
                                    name='d' + gn + '_d' + vn,
                                    on_unused_input='ignore')
                for gn, gv in g
            }
            for vn, vv in wrt.iteritems()
        }

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump(
                [self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d],
                file_handle,
                protocol=cPickle.HIGHEST_PROTOCOL)
Example #15
0
    def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
        ker, self.samples, self.params, self.KmmInv  = kernel(), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 
            'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
        
        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
                       'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 
            'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
        self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
    def __init__(self, params,correct, samples = 20,batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        
        #データの保存ファイル
        model_file_name = 'model2' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g,self.ES_US= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,m,S_b,Z,eps_NQ,eps_M=\
        T.dmatrices('X','Y','X_test','m','S_b','Z','eps_NQ','eps_M')

        mu,Sigma=T.dmatrices('mu','Sigma')

        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        
        N,Q= m.shape
        M=Z.shape[0]
        D=X.shape[1]
        
        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        S=T.exp(S_b)

        
        Xtilda = m + S * eps_NQ

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        rv_u = srng.normal((2,N,Q))
        rv_s = srng.normal((2,N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘
        
        xx_s=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_s
        xxx_s=xx_s.reshape([2,N,1,Q])
        zz=Z.reshape([1,1,M,Q])
        rbf_u=T.exp(-T.sum(((xxx_s-zz)**2)/(2*l.reshape([1,1,1,Q])),-1))*sf2#N×M
        A=Kmm+beta*T.sum(T.mean(rbf_u.reshape([2,M,1,N])*rbf_u.reshape([2,1,M,N]),0),-1)
        Ainv=sT.matrix_inverse(A)
        Sigma_f=T.dot(Kmm,T.dot(Ainv,Kmm))
                     
        xx=m.reshape([1,N,Q])+S.reshape([1,N,Q])*rv_u
        xxx=xx.reshape([2,N,1,Q])
        rbf=T.mean(T.exp(-T.sum(((xxx-zz)**2)/(2*l.reshape([1,1,1,Q])),-1)),0)#N×M
        RHS=T.sum(rbf.reshape([M,1,N])*X.reshape([1,D,N]),2)

        mu_f=beta*T.dot(Kmm,T.dot(Ainv,RHS)) 
        
        self.ES_US = theano.function([m,S_b,Z,X,lhyp,ls], [mu_f,Sigma_f],on_unused_input='ignore')
        
        rv_u_d = srng.normal((N,Q))
        rv_s_d = srng.normal((N,Q)) #平均と分散で違う乱数を使う必要があるので別々に銘銘
        Xtilda_u = m + S * rv_u_d
        Xtilda_s = m + S * rv_s_d
        Kmn_u = ker.RBF(sf2, l, Z, Xtilda_u)
        Kmn_s = ker.RBF(sf2, l, Z, Xtilda_s)
        
        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
        

        #スケール変換
        Sigma_L=sT.cholesky(Sigma)
        U = mu+Sigma_L.dot(eps_M)
        
        mean_U=T.dot(Kinterval.T,U)
        Covariance = beta       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct      
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu, Sigma_L, Kmm,KmmInv)
        
        print ('Compiling model ...')        


        inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma': Sigma, 'lhyp': lhyp, 'ls': ls, 
            'eps_M': eps_M, 'eps_NQ': eps_NQ}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'lhyp': lhyp, 'ls': ls}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g,self.ES_US], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Example #17
0
def s_variance(K, y, var_y, prior_var, K_new, var_min):
    rK = psd(prior_var * K + var_y * TT.eye(y.shape[0]))
    L = cholesky(rK)
    v = dots(matrix_inverse(L), prior_var * K_new)
    var_x = TT.maximum(prior_var - (v ** 2).sum(axis=0), var_min)
    return var_x
Example #18
0
    def __init__(self, params, correct, samples=20, batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params = params
        self.batch_size = batch_size

        #データの保存ファイル
        model_file_name = 'model2' + '.save'
        #もしこれまでに作ったのがあるならロードする
        try:
            print('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g, self.ES_US = obj
                print('Loaded!')
            return
        except:
            print('Failed. Creating a new model...')

        X,Y,X_test,m,S_b,Z,eps_NQ,eps_M=\
        T.dmatrices('X','Y','X_test','m','S_b','Z','eps_NQ','eps_M')

        mu, Sigma = T.dmatrices('mu', 'Sigma')

        lhyp = T.dvector('lhyp')
        ls = T.dvector('ls')

        N, Q = m.shape
        M = Z.shape[0]
        D = X.shape[1]

        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q])

        S = T.exp(S_b)

        Xtilda = m + S * eps_NQ

        print('Setting up cache...')

        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        rv_u = srng.normal((2, N, Q))
        rv_s = srng.normal((2, N, Q))  #平均と分散で違う乱数を使う必要があるので別々に銘銘

        xx_s = m.reshape([1, N, Q]) + S.reshape([1, N, Q]) * rv_s
        xxx_s = xx_s.reshape([2, N, 1, Q])
        zz = Z.reshape([1, 1, M, Q])
        rbf_u = T.exp(-T.sum(((xxx_s - zz)**2) /
                             (2 * l.reshape([1, 1, 1, Q])), -1)) * sf2  #N×M
        A = Kmm + beta * T.sum(
            T.mean(
                rbf_u.reshape([2, M, 1, N]) * rbf_u.reshape([2, 1, M, N]), 0),
            -1)
        Ainv = sT.matrix_inverse(A)
        Sigma_f = T.dot(Kmm, T.dot(Ainv, Kmm))

        xx = m.reshape([1, N, Q]) + S.reshape([1, N, Q]) * rv_u
        xxx = xx.reshape([2, N, 1, Q])
        rbf = T.mean(
            T.exp(-T.sum(((xxx - zz)**2) / (2 * l.reshape([1, 1, 1, Q])), -1)),
            0)  #N×M
        RHS = T.sum(rbf.reshape([M, 1, N]) * X.reshape([1, D, N]), 2)

        mu_f = beta * T.dot(Kmm, T.dot(Ainv, RHS))

        self.ES_US = theano.function([m, S_b, Z, X, lhyp, ls], [mu_f, Sigma_f],
                                     on_unused_input='ignore')

        rv_u_d = srng.normal((N, Q))
        rv_s_d = srng.normal((N, Q))  #平均と分散で違う乱数を使う必要があるので別々に銘銘
        Xtilda_u = m + S * rv_u_d
        Xtilda_s = m + S * rv_s_d
        Kmn_u = ker.RBF(sf2, l, Z, Xtilda_u)
        Kmn_s = ker.RBF(sf2, l, Z, Xtilda_s)

        print('Modeling...')

        Kmn = ker.RBF(sf2, l, Z, Xtilda)
        Knn = ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        #スケール変換
        Sigma_L = sT.cholesky(Sigma)
        U = mu + Sigma_L.dot(eps_M)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))) * correct
        KL_X = -self.KLD_X(m, S) * correct
        KL_U = -self.KLD_U(mu, Sigma_L, Kmm, KmmInv)

        print('Compiling model ...')

        inputs = {
            'X': X,
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma': Sigma,
            'lhyp': lhyp,
            'ls': ls,
            'eps_M': eps_M,
            'eps_NQ': eps_NQ
        }

        z = 0.0 * sum([
            T.sum(v) for v in inputs.values()
        ])  # solve a bug with derivative wrt inputs not in the graph

        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}

        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'lhyp': lhyp, 'ls': ls}
        self.g = {
            vn: {
                gn: theano.function(list(inputs.values()),
                                    T.grad(gv + z, vv),
                                    name='d' + gn + '_d' + vn,
                                    on_unused_input='ignore')
                for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])
            }
            for vn, vv in wrt.items()
        }

        with open(model_file_name, 'wb') as file_handle:
            print('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g, self.ES_US],
                        file_handle,
                        protocol=pickle.HIGHEST_PROTOCOL)