Esempio n. 1
0
    def get_model(self, X, Y, x_test):
        '''
        Gaussian Process Regression model.
        Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006

        Args:
            X: tensor matrix, training data
            Y: tensor matrix, training target
            x_test: tensor matrix, testing data
        
        Returns:
            K: prior cov matrix
            Ks: prior joint cov matrix
            Kss: prior cov matrix for testing data
            Posterior Distribution:
                alpha: alpha = inv(K)*(mu-m)
                sW: vector containing diagonal of sqrt(W)
                L: L = chol(sW*K*sW+eye(n))
            y_test_mu: predictive mean
            y_test_var: predictive variance
            fs2: predictive latent variance
        Note: the cov matrix inverse is computed through Cholesky factorization
        https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/
        '''
        # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14)
        K = self.covFunc(X, X, 'K')  # pior cov
        #m = T.mean(Y)*T.ones_like(Y) # pior mean
        m = self.mean * T.ones_like(Y)  # pior mean

        # Compute GP joint prior distribution between training and test (eq 2.18)
        Ks = self.covFunc(X, x_test, 'Ks')
        # Pay attention!! here is the self test cov matrix.
        Kss = self.covFunc(x_test, x_test, 'Kss', mode='self_test')

        # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood.
        sn2 = T.exp(2 * self.sigma_n)  # noise variance of likGauss
        L = sT.cholesky(K / sn2 + T.identity_like(K))
        sl = sn2
        alpha = T.dot(sT.matrix_inverse(L.T),
                      T.dot(sT.matrix_inverse(L), (Y - m))) / sl
        sW = T.ones_like(T.sum(K, axis=1)).reshape(
            (K.shape[0], 1)) / T.sqrt(sl)
        log_likelihood = T.sum(-0.5 * (T.dot((Y - m).T, alpha)) -
                               T.sum(T.log(T.diag(L))) -
                               X.shape[0] / 2 * T.log(2. * np.pi * sl))

        # Compute predictive distribution using the computed posterior distribution.
        fmu = m + T.dot(Ks.T, alpha)  # Prediction Mu fs|f, eq 2.25
        V = T.dot(sT.matrix_inverse(L),
                  T.extra_ops.repeat(sW, x_test.shape[0], axis=1) * Ks)
        fs2 = Kss - (T.sum(V * V, axis=0)).reshape(
            (1, V.shape[1])).T  # Predication Sigma, eq 2.26
        fs2 = T.maximum(fs2, 0)  # remove negative variance noise
        #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions

        y_test_mu = fmu
        y_test_var = fs2 + sn2

        return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha, V, fs2, sW
 def get_opt_A(self, sn_trf, EPhiTPhi, XT_EPhi, K_MM):
     cholSigInv = sT.cholesky(EPhiTPhi + sn_trf * T.identity_like(K_MM))
     cholK_MM = sT.cholesky(K_MM + 1e-6 * T.identity_like(K_MM))
     invCholSigInv = sT.matrix_inverse(cholSigInv)
     invCholK_MM = sT.matrix_inverse(cholK_MM)
     InvSig = invCholSigInv.T.dot(invCholSigInv)
     InvK_MM = invCholK_MM.T.dot(invCholK_MM)
     Sig_EPhiT_X = InvSig.dot(XT_EPhi.T)
     return Sig_EPhiT_X, cholSigInv, cholK_MM, InvK_MM
Esempio n. 3
0
    def get_model(self,X, Y, x_test):
        '''
        Gaussian Process Regression model.
        Reference: C.E. Rasmussen, "Gaussian Process for Machine Learning", MIT Press 2006

        Args:
            X: tensor matrix, training data
            Y: tensor matrix, training target
            x_test: tensor matrix, testing data
        
        Returns:
            K: prior cov matrix
            Ks: prior joint cov matrix
            Kss: prior cov matrix for testing data
            Posterior Distribution:
                alpha: alpha = inv(K)*(mu-m)
                sW: vector containing diagonal of sqrt(W)
                L: L = chol(sW*K*sW+eye(n))
            y_test_mu: predictive mean
            y_test_var: predictive variance
            fs2: predictive latent variance
        Note: the cov matrix inverse is computed through Cholesky factorization
        https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/
        '''
        # Compute GP prior distribution: mean and covariance matrices (eq 2.13, 2.14)
        K = self.covFunc(X,X,'K') # pior cov
        #m = T.mean(Y)*T.ones_like(Y) # pior mean
        m = self.mean*T.ones_like(Y) # pior mean

        # Compute GP joint prior distribution between training and test (eq 2.18)
        Ks = self.covFunc(X,x_test,'Ks')
        # Pay attention!! here is the self test cov matrix.
        Kss = self.covFunc(x_test,x_test,'Kss',mode='self_test')

        # Compute posterior distribution with noise: L,alpha,sW,and log_likelihood.
        sn2 = T.exp(2*self.sigma_n) # noise variance of likGauss
        L = sT.cholesky(K/sn2 + T.identity_like(K))
        sl = sn2
        alpha = T.dot(sT.matrix_inverse(L.T), 
                      T.dot(sT.matrix_inverse(L), (Y-m)) ) / sl
        sW = T.ones_like(T.sum(K,axis=1)).reshape((K.shape[0],1)) / T.sqrt(sl)
        log_likelihood = T.sum(-0.5 * (T.dot((Y-m).T, alpha)) - T.sum(T.log(T.diag(L))) - X.shape[0] / 2 * T.log(2.*np.pi*sl))
        
        
        # Compute predictive distribution using the computed posterior distribution.
        fmu = m + T.dot(Ks.T, alpha) # Prediction Mu fs|f, eq 2.25 
        V = T.dot(sT.matrix_inverse(L),T.extra_ops.repeat(sW,x_test.shape[0],axis=1)*Ks)
        fs2 = Kss - (T.sum(V*V,axis=0)).reshape((1,V.shape[1])).T # Predication Sigma, eq 2.26
        fs2 = T.maximum(fs2,0) # remove negative variance noise
        #fs2 = T.sum(fs2,axis=1) # in case x has multiple dimensions

        y_test_mu = fmu
        y_test_var = fs2 + sn2

        return K, Ks, Kss, y_test_mu, y_test_var, log_likelihood, L, alpha,V, fs2,sW
Esempio n. 4
0
    def __init__(self, mu, sigma, random_state=None):
        super(MultivariateNormal, self).__init__(mu=mu, sigma=sigma)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self.make_(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = ((1. / T.sqrt(
            (2. * np.pi)**self.ndim_ * T.abs_(sigma_det))) *
                     T.exp(-0.5 * T.sum(T.mul(
                         T.dot(self.X - self.mu, sigma_inv), self.X - self.mu),
                                        axis=1))).ravel()
        self.make_(self.pdf_, "pdf")

        # -log pdf
        self.nll_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self.make_(self.nll_, "nll")

        # self.rvs_
        self.make_(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
Esempio n. 5
0
 def get_opt_A(self, tau, EPhiTPhi, YT_EPhi):
     SigInv = EPhiTPhi + (tau**-1 + 1e-4) * T.identity_like(EPhiTPhi)
     cholTauSigInv = tau**0.5 * sT.cholesky(SigInv)
     invCholTauSigInv = sT.matrix_inverse(cholTauSigInv)
     tauInvSig = invCholTauSigInv.T.dot(invCholTauSigInv)
     Sig_EPhiT_Y = tau * tauInvSig.dot(YT_EPhi.T)
     return Sig_EPhiT_Y, tauInvSig, cholTauSigInv
Esempio n. 6
0
File: gp.py Progetto: cc13ny/galatea
 def expected_new_y(self, x, y, new_x):
     assert new_x.ndim == 0
     beta = alloc_diag(T.alloc(1., (x.shape[0],)) * self.beta)
     C = self.kernel.gram_matrix(x) + beta
     C_inv = matrix_inverse(C)
     k = self.kernel(x, new_x)
     return T.dot(k, T.dot(C_inv, y))
Esempio n. 7
0
def l2ls_learn_basis_dual(X, S, c):
    tX = T.matrix('X')
    tS = T.matrix('S')
    tc = T.scalar('c')
    tlambdas = T.vector('lambdas')

    tXST = T.dot(tX, tS.T)
    tSSTetc = la.matrix_inverse(T.dot(tS, tS.T) + T.diag(tlambdas))

    objective = -(T.dot(tX, tX.T).trace()
                  - reduce(T.dot, [tXST, tSSTetc, tXST.T]).trace()
                  - tc*tlambdas.sum())

    objective_fn = theano.function([tlambdas],
                                   objective,
                                   givens={tX: X, tS: S, tc: c})
    objective_grad_fn = theano.function([tlambdas],
                                        T.grad(objective, tlambdas),
                                        givens={tX: X, tS: S, tc: c})

    initial_lambdas = 10*np.abs(np.random.random((S.shape[0], 1)))
    output = scipy.optimize.fmin_cg(f=objective_fn,
                                    fprime=objective_grad_fn,
                                    x0=initial_lambdas,
                                    maxiter=100,
                                    full_output=True)
    logging.debug("optimizer stats %s" % (output[1:],))
    logging.debug("optimizer lambdas %s" % output[0])

    lambdas = output[0]
    B = np.dot(np.linalg.inv(np.dot(S, S.T) + np.diag(lambdas)),
               np.dot(S, X.T)).T

    return B
Esempio n. 8
0
 def logp(self, X):
     v = self.v
     p = self.p
     S = self.S
     Z = self.Z
     result = -Z + log(det(X)) * -(v + p + 1.) / 2. - trace(S.dot(matrix_inverse(X))) / 2.
     return ifelse(gt(v, p-1), result, self.invalid) 
Esempio n. 9
0
 def get_opt_A(self, sn_trf, EPhiTPhi, XT_EPhi):
     SigInv = EPhiTPhi + (sn_trf + 1e-6) * T.identity_like(EPhiTPhi)
     cholSigInv = sT.cholesky(SigInv)
     invCholSigInv = sT.matrix_inverse(cholSigInv)
     InvSig = invCholSigInv.T.dot(invCholSigInv)
     Sig_EPhiT_X = InvSig.dot(XT_EPhi.T)
     return Sig_EPhiT_X, cholSigInv
Esempio n. 10
0
 def expected_new_y(self, x, y, new_x):
     assert new_x.ndim == 0
     beta = alloc_diag(T.alloc(1., (x.shape[0], )) * self.beta)
     C = self.kernel.gram_matrix(x) + beta
     C_inv = matrix_inverse(C)
     k = self.kernel(x, new_x)
     return T.dot(k, T.dot(C_inv, y))
Esempio n. 11
0
File: normal.py Progetto: ibab/carl
    def __init__(self, mu, sigma, random_state=None):
        super(MultivariateNormal, self).__init__(mu=mu,
                                                 sigma=sigma,
                                                 random_state=random_state,
                                                 optimizer=None)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self.make_(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = (
            (1. / T.sqrt((2. * np.pi) ** self.ndim_ * T.abs_(sigma_det))) *
            T.exp(-0.5 * T.sum(T.mul(T.dot(self.X - self.mu,
                                           sigma_inv),
                                     self.X - self.mu),
                               axis=1))).ravel()
        self.make_(self.pdf_, "pdf")

        # -log pdf
        self.nnlf_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self.make_(self.nnlf_, "nnlf")

        # self.rvs_
        self.make_(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
Esempio n. 12
0
    def logp(X):
        IVI = det(V)
        return bound(
            ((n - p - 1) * log(IVI) - trace(matrix_inverse(V).dot(X)) -
             n * p * log(
             2) - n * log(IVI) - 2 * multigammaln(p, n / 2)) / 2,

            all(n > p - 1))
Esempio n. 13
0
 def s_variance(self, x):
     """Gaussian Process variance at points x"""
     K, y, var_y, N = self.kyn()
     rK = psd(K + var_y * tensor.eye(N))
     K_x = self.K_fn(self.x, x)
     var_x = 1 - diag(dots(K_x.T, matrix_inverse(rK), K_x))
     if var_x.dtype != self.dtype:
         raise TypeError('var_x dtype', var_x.dtype)
     return var_x
Esempio n. 14
0
 def build_theano_models(self, algo, algo_params):
     epsilon = 1e-6
     kl = lambda mu, sig: sig+mu**2-TT.log(sig)
     X, y = TT.dmatrices('X', 'y')
     params = TT.dvector('params')
     a, b, c, l_F, F, l_FC, FC = self.unpack_params(params)
     sig2_n, sig_f = TT.exp(2*a), TT.exp(b)
     l_FF = TT.dot(X, l_F)+l_FC
     FF = TT.concatenate((l_FF, TT.dot(X, F)+FC), 1)
     Phi = TT.concatenate((TT.cos(FF), TT.sin(FF)), 1)
     Phi = sig_f*TT.sqrt(2./self.M)*Phi
     noise = TT.log(1+TT.exp(c))
     PhiTPhi = TT.dot(Phi.T, Phi)
     A = PhiTPhi+(sig2_n+epsilon)*TT.identity_like(PhiTPhi)
     L = Tlin.cholesky(A)
     Li = Tlin.matrix_inverse(L)
     PhiTy = Phi.T.dot(y)
     beta = TT.dot(Li, PhiTy)
     alpha = TT.dot(Li.T, beta)
     mu_f = TT.dot(Phi, alpha)
     var_f = (TT.dot(Phi, Li.T)**2).sum(1)[:, None]
     dsp = noise*(var_f+1)
     mu_l = TT.sum(TT.mean(l_F, axis=1))
     sig_l = TT.sum(TT.std(l_F, axis=1))
     mu_w = TT.sum(TT.mean(F, axis=1))
     sig_w = TT.sum(TT.std(F, axis=1))
     hermgauss = np.polynomial.hermite.hermgauss(30)
     herm_x = Ts(hermgauss[0])[None, None, :]
     herm_w = Ts(hermgauss[1]/np.sqrt(np.pi))[None, None, :]
     herm_f = TT.sqrt(2*var_f[:, :, None])*herm_x+mu_f[:, :, None]
     nlk = (0.5*herm_f**2.-y[:, :, None]*herm_f)/dsp[:, :, None]+0.5*(
         TT.log(2*np.pi*dsp[:, :, None])+y[:, :, None]**2/dsp[:, :, None])
     enll = herm_w*nlk
     nlml = 2*TT.log(TT.diagonal(L)).sum()+2*enll.sum()+1./sig2_n*(
         (y**2).sum()-(beta**2).sum())+2*(X.shape[0]-self.M)*a
     penelty = (kl(mu_w, sig_w)*self.M+kl(mu_l, sig_l)*self.S)/(self.S+self.M)
     cost = (nlml+penelty)/X.shape[0]
     grads = TT.grad(cost, params)
     updates = getattr(OPT, algo)(self.params, grads, **algo_params)
     updates = getattr(OPT, 'apply_nesterov_momentum')(updates, momentum=0.9)
     train_inputs = [X, y]
     train_outputs = [cost, alpha, Li]
     self.train_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)])
     self.train_iter_func = Tf(train_inputs, train_outputs,
         givens=[(params, self.params)], updates=updates)
     Xs, Li, alpha = TT.dmatrices('Xs', 'Li', 'alpha')
     l_FFs = TT.dot(Xs, l_F)+l_FC
     FFs = TT.concatenate((l_FFs, TT.dot(Xs, F)+FC), 1)
     Phis = TT.concatenate((TT.cos(FFs), TT.sin(FFs)), 1)
     Phis = sig_f*TT.sqrt(2./self.M)*Phis
     mu_pred = TT.dot(Phis, alpha)
     std_pred = (noise*(1+(TT.dot(Phis, Li.T)**2).sum(1)))**0.5
     pred_inputs = [Xs, alpha, Li]
     pred_outputs = [mu_pred, std_pred]
     self.pred_func = Tf(pred_inputs, pred_outputs,
         givens=[(params, self.params)])
Esempio n. 15
0
 def s_variance(self, x):
     """Gaussian Process variance at points x"""
     K, y, var_y, N = self.kyn()
     rK = psd(K + var_y * tensor.eye(N))
     K_x = self.K_fn(self.x, x)
     var_x = 1 - diag(dots(K_x.T, matrix_inverse(rK), K_x))
     if var_x.dtype != self.dtype:
         raise TypeError('var_x dtype', var_x.dtype)
     return var_x
Esempio n. 16
0
    def s_mean(self, x):
        """Gaussian Process mean at points x"""
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        alpha = tensor.dot(matrix_inverse(rK), y)

        K_x = self.K_fn(self.x, x)
        y_x = tensor.dot(alpha, K_x)
        if y_x.dtype != self.dtype:
            raise TypeError('y_x dtype', y_x.dtype)
        return y_x
Esempio n. 17
0
    def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1):
        A, B = transition, emission  # (h, h), (h, v)

        # Shortcuts for the filtered mean and covariance from the previous
        # time step.
        f_m1 = filtered_hidden_mean_m1  # (n, h)
        F_m1 = filtered_hidden_cov_m1  # (n, h, h)

        # Calculate mean of joint.
        hidden_mean = T.dot(f_m1, A) + hnm  # (n, h)

        visible_mean = T.dot(hidden_mean, B) + vnm  # (n, v)

        # Calculate covariance of joint.
        hidden_cov = stacked_dot(A.T, stacked_dot(F_m1, A))  # (n, h, h)

        hidden_cov += hnc

        visible_cov = stacked_dot(  # (n, v, v)
            B.T, stacked_dot(hidden_cov, B))
        visible_cov += vnc

        visible_hidden_cov = stacked_dot(hidden_cov, B)  # (n, h, v)

        visible_error = visible - visible_mean  # (n, v)

        inv_visible_cov, _ = theano.map(lambda x: matrix_inverse(x),
                                        visible_cov)  # (n, v, v)

        # I don't know a better name for this monster.
        visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2,
                                                             1)  # (n, v, h)
        D = stacked_dot(inv_visible_cov, visible_hidden_cov_T)

        f = (
            D * visible_error.dimshuffle(0, 1, 'x')  # (n, h)
        ).sum(axis=1)
        f += hidden_mean

        F = hidden_cov
        F -= stacked_dot(visible_hidden_cov, D)

        log_l = (
            inv_visible_cov *  # (n,)
            visible_error.dimshuffle(0, 1, 'x') *
            visible_error.dimshuffle(0, 'x', 1)).sum(axis=(1, 2))
        log_l *= -.5

        dets, _ = theano.map(lambda x: det(x), visible_cov)

        log_l -= 0.5 * T.log(dets)
        log_l -= np.log(2 * np.pi)

        return f, F, log_l
Esempio n. 18
0
 def __init__(self, v, S, *args, **kwargs):
     super(Wishart, self).__init__(*args, **kwargs)
     self.v = v
     self.S = S
     self.p = p = S.shape[0]
     self.inv_S = matrix_inverse(S)
     
     'TODO: We should pre-compute the following if the parameters are fixed'   
     self.invalid = theano.tensor.fill(S, nan) # Invalid result, if v<p
     self.Z = log(2.)*(v * p / 2.) + multigammaln(p, v / 2.) - log(det(S)) * v / 2.,
     self.mean = ifelse(gt(v, p-1), S / ( v - p - 1), self.invalid) 
Esempio n. 19
0
    def s_mean(self, x):
        """Gaussian Process mean at points x"""
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        alpha = tensor.dot(matrix_inverse(rK), y)

        K_x = self.K_fn(self.x, x)
        y_x = tensor.dot(alpha, K_x)
        if y_x.dtype != self.dtype:
            raise TypeError('y_x dtype', y_x.dtype)
        return y_x
Esempio n. 20
0
    def KLD_U(self, m, L_scaled,
              Kmm):  #N(u|m,S)とN(u|0,Kmm) S=L*L.T(コレスキー分解したのを突っ込みましょう)
        M = m.shape[0]
        D = m.shape[1]
        KmmInv = sT.matrix_inverse(Kmm)

        KL_U = D * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M -
                    2.0 * T.sum(T.log(T.diag(L_scaled))) +
                    2.0 * T.sum(T.log(T.diag(sT.cholesky(Kmm)))))
        KL_U += T.sum(T.dot(KmmInv, m) * m)

        return 0.5 * KL_U
Esempio n. 21
0
    def s_nll(self):
        """ Marginal negative log likelihood of model

        :note: See RW.pdf page 37, Eq. 2.30.
        """
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        nll = (0.5 * dots(y, matrix_inverse(rK), y) +
               0.5 * tensor.log(det(rK)) + N / 2.0 * tensor.log(2 * numpy.pi))
        if nll.dtype != self.dtype:
            raise TypeError('nll dtype', nll.dtype)
        return nll
Esempio n. 22
0
File: lds.py Progetto: ddofer/breze
    def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1):
        A, B = transition, emission                         # (h, h), (h, v)

        # Shortcuts for the filtered mean and covariance from the previous
        # time step.
        f_m1 = filtered_hidden_mean_m1                      # (n, h)
        F_m1 = filtered_hidden_cov_m1                       # (n, h, h)

        # Calculate mean of joint.
        hidden_mean = T.dot(f_m1, A) + hnm                  # (n, h)

        visible_mean = T.dot(hidden_mean, B) + vnm          # (n, v)

        # Calculate covariance of joint.
        hidden_cov = stacked_dot(
            A.T, stacked_dot(F_m1, A))                      # (n, h, h)

        hidden_cov += hnc

        visible_cov = stacked_dot(                          # (n, v, v)
            B.T, stacked_dot(hidden_cov, B))
        visible_cov += vnc

        visible_hidden_cov = stacked_dot(hidden_cov, B)     # (n, h, v)

        visible_error = visible - visible_mean              # (n, v)

        inv_visible_cov, _ = theano.map(
            lambda x: matrix_inverse(x), visible_cov)       # (n, v, v)

        # I don't know a better name for this monster.
        visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2, 1)   # (n, v, h)
        D = stacked_dot(inv_visible_cov, visible_hidden_cov_T)

        f = (D * visible_error.dimshuffle(0, 1, 'x')        # (n, h)
            ).sum(axis=1)
        f += hidden_mean

        F = hidden_cov
        F -= stacked_dot(visible_hidden_cov, D)

        log_l = (inv_visible_cov *                          # (n,)
            visible_error.dimshuffle(0, 1, 'x') *
            visible_error.dimshuffle(0,'x', 1)).sum(axis=(1, 2))
        log_l *= -.5

        dets, _ = theano.map(lambda x: det(x), visible_cov)

        log_l -= 0.5 * T.log(dets)
        log_l -= np.log(2 * np.pi)

        return f, F, log_l
Esempio n. 23
0
 def compile_theano_funcs(self, opt_algo, opt_params, dropout):
     self.compiled_funcs = {}
     # Compile Train & Optimization Function
     eps = 1e-5
     params = Tt.vector('params')
     X, Y = Tt.matrix('X'), Tt.matrix('Y')
     sig2, F, M, V = self.feature_maps(X, params)
     EPhi = F[-1]
     EPhiPhiT = Tt.dot(EPhi, Tt.transpose(EPhi))
     A = EPhiPhiT + (sig2 + eps) * Tt.identity_like(EPhiPhiT)
     L = Tlin.cholesky(A)
     Linv = Tlin.matrix_inverse(L)
     YPhiT = Tt.dot(Y, Tt.transpose(EPhi))
     beta = Tt.dot(YPhiT, Tt.transpose(Linv))
     alpha = Tt.dot(beta, Linv)
     mu_F = Tt.dot(alpha, EPhi)
     GOF = .5 / sig2 * Tt.sum(Tt.sum(Tt.dot(Y, (Y - mu_F).T)))
     REG = Tt.sum(Tt.log(
         Tt.diagonal(L))) + (self.N - self.D[-2]) / 2 * Tt.log(sig2)
     REG *= self.D[-1]
     KL = 0
     for h in range(self.H):
         KL += Tt.sum(Tt.sum(M[h]**2) + Tt.sum(V[h] - Tt.log(V[h] + eps)))
         KL -= self.D[h + 1] * self.D[h + 2] // 2
     obj = debug('obj', GOF + REG + KL)
     self.compiled_funcs['debug'] = Tf([X, Y], [obj],
                                       givens=[(params, self.params)])
     grads = Tt.grad(obj, params)
     updates = {self.params: grads}
     updates = getattr(Optimizer, opt_algo)(updates, **opt_params)
     updates = getattr(Optimizer, 'nesterov')(updates, momentum=0.9)
     train_inputs = [X, Y]
     train_outputs = [obj, alpha, Linv, mu_F]
     self.compiled_funcs['opt'] = Tf(train_inputs,
                                     train_outputs,
                                     givens=[(params, self.params)],
                                     updates=updates)
     self.compiled_funcs['train'] = Tf(train_inputs,
                                       train_outputs,
                                       givens=[(params, self.params)])
     # Compile Predict Function
     Linv, alpha = Tt.matrix('Linv'), Tt.matrix('alpha')
     Xs = Tt.matrix('Xs')
     sig2, Fs, _, _ = self.feature_maps(Xs, params)
     EPhis = Fs[-1]
     mu_Fs = Tt.dot(alpha, EPhis)
     std_Fs = ((sig2 * (1 + (Tt.dot(Linv, EPhis)**2).sum(0)))**0.5)[:, None]
     pred_inputs = [Xs, alpha, Linv]
     pred_outputs = [mu_Fs, std_Fs]
     self.compiled_funcs['pred'] = Tf(pred_inputs,
                                      pred_outputs,
                                      givens=[(params, self.params)])
Esempio n. 24
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)

        return bound(
            ((n - p - 1) * log(IVI) - trace(matrix_inverse(V).dot(X)) -
             n * p * log(
             2) - n * log(IVI) - 2 * multigammaln(p, n / 2)) / 2,

             n > (p - 1))
Esempio n. 25
0
    def s_nll(self):
        """ Marginal negative log likelihood of model

        :note: See RW.pdf page 37, Eq. 2.30.
        """
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        nll = (0.5 * dots(y, matrix_inverse(rK), y)
                + 0.5 * tensor.log(det(rK))
                + N / 2.0 * tensor.log(2 * numpy.pi))
        if nll.dtype != self.dtype:
            raise TypeError('nll dtype', nll.dtype)
        return nll
Esempio n. 26
0
    def s_deg_of_freedom(self):
        """
        Degrees of freedom aka "effective number of parameters"
        of kernel smoother.

        Defined pg. 25 of Rasmussen & Williams.
        """
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        dof = trace(tensor.dot(K, matrix_inverse(rK)))
        if dof.dtype != self.dtype:
            raise TypeError('dof dtype', dof.dtype)
        return dof
Esempio n. 27
0
    def s_deg_of_freedom(self):
        """
        Degrees of freedom aka "effective number of parameters"
        of kernel smoother.

        Defined pg. 25 of Rasmussen & Williams.
        """
        K, y, var_y, N = self.kyn()
        rK = psd(K + var_y * tensor.eye(N))
        dof = trace(tensor.dot(K, matrix_inverse(rK)))
        if dof.dtype != self.dtype:
            raise TypeError('dof dtype', dof.dtype)
        return dof
Esempio n. 28
0
    def get_model(self, X, Y, X_test):
        #initial_params = {'m':m,'S_b':S_b,'mu':mu,'Sigma_b':Sigma_b,'Z':Z,'lhyp':lhyp,'ls':ls}
        (M, D), N, Q = self.Z.shape, X.shape[0], X.shape[1]

        #変数の正の値への制約条件
        beta, sf2, l = T.exp(self.ls), T.exp(self.lhyp[0]), T.exp(
            self.lhyp[1:])
        S = T.exp(self.S_b)
        #Sigma=T.exp(self.Sigma_b)

        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * self.mu, sf2**0.5 * Sigma

        #reparametarizationのための乱数
        srng = T.shared_randomstreams.RandomStreams(234)
        eps_NQ = srng.normal(self.m.shape)
        eps_M = srng.normal(self.mu.shape)

        #サンプルの生成。バッチでやるので一回だけのMC
        Xtilda = self.m + S * eps_NQ
        U = mu_scaled + Sigma_scaled * eps_M

        Kmm = self.ker.RBF(sf2, l, self.Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        Kmn = self.ker.RBF(sf2, l, self.Z, Xtilda)
        Knn = self.ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))

        #KL_X = -0.5 * (-T.sum(T.log(T.sum(Sigma,0))) + T.dot(m.T,T.dot(KmmInv,m)).squeeze() + T.sum((Sigma*KmmInv)) - M)-0.5*T.log(KmmDet)

        KL_X = self.KLD_X(self.m, S)

        KL_U = self.KLD_U(mu_scaled, Sigma_scaled, Kmm)

        return KL_X, KL_U, LL
Esempio n. 29
0
def LNLEP( theta = Th.dvector('theta'), M    = Th.dmatrix('M') ,
                       STA   = Th.dvector('STA')  , STC  = Th.dmatrix('STC'), 
                       N_spike = Th.dscalar('N_spike'), **other):
    '''
    The actual quadratic-Poisson model, as a function of theta and M, 
    without any barriers or priors.
    '''
    ImM = Th.identity_like(M)-(M+M.T)/2
    ldet = logdet(ImM) # Th.log( det( ImM) )  # logdet(ImM)
    return -0.5 * N_spike *( 
             ldet \
             - Th.sum(Th.dot(matrix_inverse(ImM),theta) * theta) \
             + 2. * Th.sum( theta * STA ) \
             + Th.sum( M * (STC + Th.outer(STA,STA)) ))
Esempio n. 30
0
def quadratic_Poisson( theta = Th.dvector('theta'), M    = Th.dmatrix('M') ,
                       STA   = Th.dvector('STA')  , STC  = Th.dmatrix('STC'), 
                       N_spike = Th.dscalar('N_spike'), logprior = 0 , 
                       **other):
    '''
    The actual quadratic-Poisson model, as a function of theta and M, 
    with a barrier on the log-det term and a prior.
    '''
    ImM = Th.identity_like(M)-(M+M.T)/2
    ldet = logdet(ImM)    # Th.log( det( ImM) )  # logdet(ImM)
    return -0.5 * N_spike *(
             ldet + logprior \
             - 1./(ldet+250.)**2. \
             - Th.sum(Th.dot(matrix_inverse(ImM),theta) * theta) \
             + 2. * Th.sum( theta * STA ) \
             + Th.sum( M * (STC + Th.outer(STA,STA)) ))
Esempio n. 31
0
def LQLEP( theta   = Th.dvector()  , M    = Th.dmatrix() ,
           STA     = Th.dvector()  , STC  = Th.dmatrix() , 
           N_spike = Th.dscalar()  , Cm1  = Th.dmatrix() , **other):
    '''
    The actual Linear-Quadratic-Exponential-Poisson log-likelihood, 
    as a function of theta and M, without any barriers or priors.
    '''
#    ImM = Th.identity_like(M)-(M+M.T)/2
    ImM = Cm1-(M+M.T)/2
    ldet = logdet(ImM)
    LQLEP = -0.5 * N_spike *( ldet - logdet(Cm1) \
             - Th.sum(Th.dot(matrix_inverse(ImM),theta) * theta) \
             + 2. * Th.sum( theta * STA ) \
             + Th.sum( M * (STC + Th.outer(STA,STA)) ))
    other.update(locals())
    return named( **other )
Esempio n. 32
0
def get_rbfnet_learning_func(f_name):
    assert f_name == 'euclidean'
    X_matrix = T.dmatrix('X')
    W_matrix = T.dmatrix('W')
    b = T.scalar('b')
    C_scalar = T.scalar('C')
    y_vector = T.dvector('y')

    H_matrix = metric_theano[f_name](X_matrix, W_matrix)
    H_rbf = np.exp(T.power(H_matrix, 2) * (-b))

    beta_matrix = T.dot(
        matrix_inverse(T.dot(H_rbf.T, H_rbf) + 1.0 / C_scalar * T.eye(H_rbf.shape[1])),
        T.dot(H_rbf.T, y_vector).T)
    # beta_function = theano.function([H_matrix, C_scalar, y_vector], beta_matrix)
    rbfnet_learning_function = theano.function([X_matrix, W_matrix, C_scalar, b, y_vector],
                                               beta_matrix)
    return rbfnet_learning_function
Esempio n. 33
0
    def step(filtered_mean, filtered_cov,
             smoothed_mean_p1, smoothed_cov_p1):
        f, F = filtered_mean, filtered_cov                  # (n, h), (n, h, h)

        hidden_mean = T.dot(f, A) + hidden_noise_mean       # (n, h)

        hidden_cov = stacked_dot(A.T,
                                 stacked_dot(F, A))         # (n, h, h)
        hidden_cov += hidden_noise_cov

        hidden_p1_hidden_cov = stacked_dot(A.T, F)            # (n, h, h)

        hidden_p1_hidden_cov_T = hidden_p1_hidden_cov.dimshuffle(0, 2, 1)

        inv_hidden_cov, _ = theano.map(
            lambda x: matrix_inverse(x), hidden_cov)        # (n, h, h)

        cov_rev = F - stacked_dot(
            stacked_dot(hidden_p1_hidden_cov_T, inv_hidden_cov),
            hidden_p1_hidden_cov)                           # (n, h, h)

        trans_rev = stacked_dot(hidden_p1_hidden_cov_T,     # (n, h, h)
                                inv_hidden_cov)

        mean_rev = f
        mean_rev -= (hidden_mean.dimshuffle(0, 'x', 1) * trans_rev # (n, h)
                    ).sum(axis=2)

        # Turn these into matrices so they work with stacked_dot.
        smoothed_mean_p1 = smoothed_mean_p1.dimshuffle(0, 'x', 1)

        trans_rev_T = trans_rev.dimshuffle(0, 2, 1)
        smoothed_mean = stacked_dot(smoothed_mean_p1, trans_rev_T)
        smoothed_mean = smoothed_mean[0, :, :]
        smoothed_mean += mean_rev

        smoothed_cov = stacked_dot(trans_rev,
                                   stacked_dot(smoothed_cov_p1, trans_rev_T))
        
        smoothed_cov += cov_rev

        return smoothed_mean, smoothed_cov
Esempio n. 34
0
def s_nll(K, y, var_y, prior_var):
    """
    Marginal negative log likelihood of model

    K - gram matrix (matrix-like)
    y - the training targets (vector-like)
    var_y - the variance of uncertainty about y (vector-like)

    :note: See RW.pdf page 37, Eq. 2.30.

    """

    n = y.shape[0]
    rK = psd(prior_var * K + var_y * TT.eye(n))

    fit = .5 * dots(y, matrix_inverse(rK), y)
    complexity = 0.5 * TT.log(det(rK))
    normalization = n / 2.0 * TT.log(2 * np.pi)
    nll = fit + complexity + normalization
    return nll
Esempio n. 35
0
def l2ls_learn_basis_dual(X, S, c):
    tX = T.matrix('X')
    tS = T.matrix('S')
    tc = T.scalar('c')
    tlambdas = T.vector('lambdas')

    tXST = T.dot(tX, tS.T)
    tSSTetc = la.matrix_inverse(T.dot(tS, tS.T) + T.diag(tlambdas))

    objective = -(T.dot(tX, tX.T).trace() - reduce(
        T.dot, [tXST, tSSTetc, tXST.T]).trace() - tc * tlambdas.sum())

    objective_fn = theano.function([tlambdas],
                                   objective,
                                   givens={
                                       tX: X,
                                       tS: S,
                                       tc: c
                                   })
    objective_grad_fn = theano.function([tlambdas],
                                        T.grad(objective, tlambdas),
                                        givens={
                                            tX: X,
                                            tS: S,
                                            tc: c
                                        })

    initial_lambdas = 10 * np.abs(np.random.random((S.shape[0], 1)))
    output = scipy.optimize.fmin_cg(f=objective_fn,
                                    fprime=objective_grad_fn,
                                    x0=initial_lambdas,
                                    maxiter=100,
                                    full_output=True)
    logging.debug("optimizer stats %s" % (output[1:], ))
    logging.debug("optimizer lambdas %s" % output[0])

    lambdas = output[0]
    B = np.dot(np.linalg.inv(np.dot(S, S.T) + np.diag(lambdas)),
               np.dot(S, X.T)).T

    return B
Esempio n. 36
0
def normal(X, m, C):
    """
    Evaluates the density of a normal distribution.

    @type  X: C{TensorVariable}
    @param X: matrix storing data points column-wise

    @type  m: C{ndarray}/C{TensorVariable}
    @param m: column vector representing the mean of the Gaussian

    @type  C: C{ndarray}/C{TensorVariable}
    @param C: covariance matrix

    @rtype: C{TensorVariable}
    @return: density of a Gaussian distribution evaluated at C{X}
    """

    Z = X - m

    return tt.exp(-tt.sum(Z * tt.dot(tl.matrix_inverse(C), Z), 0) / 2. -
                  tt.log(tl.det(C)) / 2. - m.size / 2. * np.log(2. * np.pi))
Esempio n. 37
0
def get_eem_learning_function(metric_name):
    W = T.dmatrix('W')
    X = T.dmatrix('X')
    H = metric_theano[metric_name](X, W)
    H_func = theano.function([X, W], H)
    C = T.scalar('C')

    H_plus = T.dmatrix('H_plus')
    H_minus = T.dmatrix('H_minus')

    sigma_plus = T.dmatrix('sigma_plus')
    sigma_minus = T.dmatrix('sigma_minus')

    sigma_plus_reg = sigma_plus + T.eye(sigma_plus.shape[1]) / 2 * C
    sigma_minus_reg = sigma_minus + T.eye(sigma_minus.shape[1]) / 2 * C

    m_plus = H_plus.mean(axis=0).T
    m_minus = H_minus.mean(axis=0).T

    mean_diff = m_plus - m_minus
    beta = (2 * T.dot(matrix_inverse(sigma_plus_reg + sigma_minus_reg), mean_diff)
            / mean_diff.norm(L=2))
    func = theano.function([H_plus, H_minus, sigma_plus, sigma_minus, C],
                           [beta, sigma_plus_reg, sigma_minus_reg, m_plus, m_minus])

    def eem_learning_function(X, W, y, C):
        the_H = H_func(X, W)

        the_H_plus = the_H[y == 1]
        the_H_minus = the_H[y == -1]

        the_sigma_plus = LedoitWolf(store_precision=False).fit(the_H_plus).covariance_
        the_sigma_minus = LedoitWolf(store_precision=False).fit(the_H_minus).covariance_

        if C is None:
            C = 0

        return func(the_H_plus, the_H_minus, the_sigma_plus, the_sigma_minus, C)

    return eem_learning_function
Esempio n. 38
0
    def _build_graph(self):
        """Sets up the gaussian process's tensor variables."""
        X = self.X
        Y = self.Y
        x = self.x
        reg = self.reg

        if self._normalize_y:
            Y_mean = T.mean(Y, axis=0)
            Y_variance = T.std(Y, axis=0)
            Y = (Y - Y_mean) / Y_variance

        # Kernel functions.
        K_ss = self._kernel(x, x)
        K_s = self._kernel(x, X)
        K = self._kernel(X, X) + self._sigma_n**2 * T.eye(X.shape[0])

        # Guarantee positive definite.
        K = 0.5 * (K + K.T) + reg * T.eye(K.shape[0])

        # Mean and variance functions.
        K_inv = sT.matrix_inverse(K)
        mu = T.dot(K_s, T.dot(K_inv, self.Y))  # Non-normalized Y for scale.
        var = K_ss - T.dot(K_s, T.dot(K_inv, K_s.T))

        # Compute the standard deviation.
        L = sT.cholesky(K)
        L_k = T.slinalg.solve_lower_triangular(L, K_s.T)
        std = T.sqrt(T.diag(K_ss) - T.sum(L_k**2, axis=0)).reshape((-1, 1))

        # Compute the log likelihood.
        log_likelihood_dims = -0.5 * T.dot(Y.T, T.dot(K_inv, Y)).sum(axis=0)
        log_likelihood_dims -= T.log(T.diag(L)).sum()
        log_likelihood_dims -= L.shape[0] / 2 * T.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(axis=-1)

        self._mu = mu
        self._var = var
        self._std = std
        self._log_likelihood = log_likelihood
Esempio n. 39
0
    def generate_optimize_basis():
        # original solution
        tx0 = partial.x
        # optimized solution
        tx1 = T.dot(tl.matrix_inverse(T.dot(partial.A.T, partial.A)),
                    T.dot(partial.A.T, y) - gamma/2*partial.theta)

        # investigate zero crossings between tx0 and tx1
        tbetas = tx0 / (tx0 - tx1)
        # investigate tx1
        tbetas = T.concatenate([tbetas, [1.0]])
        # only between tx0 and inclusively tx1
        tbetas = tbetas[(T.lt(0, tbetas) * T.le(tbetas, 1)).nonzero()]

        txbs, _ = theano.map(lambda b: (1-b)*tx0 + b*tx1, [tbetas])
        tlosses, _ = theano.map(loss, [txbs])
        # select the optimum
        txb = txbs[T.argmin(tlosses)]

        return theano.function([tpart, full.x, full.theta],
                               [T.set_subtensor(partial.x,     txb),
                                T.set_subtensor(partial.theta, T.sgn(txb))])
Esempio n. 40
0
def get_xelm_learning_function(f_name):
    # global xelm_learning_function


    X_matrix = T.dmatrix('X')
    W_matrix = T.dmatrix('W')
    # b_vector = T.dvector('b')
    w_vector = T.dvector('w')
    C_scalar = T.scalar('C')
    y_vector = T.dvector('y')

    H_matrix = metric_theano[f_name](X_matrix, W_matrix)

    Hw_matrix = H_matrix * w_vector.reshape((-1, 1))
    yw_vector = (y_vector * w_vector)

    beta_matrix = T.dot(
        matrix_inverse(T.dot(Hw_matrix.T, Hw_matrix) + 1.0 / C_scalar * T.eye(Hw_matrix.shape[1])),
        T.dot(Hw_matrix.T, yw_vector).T)
    # beta_function = theano.function([H_matrix, C_scalar, y_vector], beta_matrix)
    xelm_learning_function = theano.function([X_matrix, W_matrix, w_vector, C_scalar, y_vector],
                                             beta_matrix)
    return xelm_learning_function
Esempio n. 41
0
    def __init__(self, mu, sigma):
        """Constructor.

        Parameters
        ----------
        * `mu` [1d array]:
            The means.

        * `sigma` [2d array]:
            The covariance matrix.
        """
        super(MultivariateNormal, self).__init__(mu=mu, sigma=sigma)
        # XXX: The SDP-ness of sigma should be check upon changes

        # ndim
        self.ndim_ = self.mu.shape[0]
        self._make(self.ndim_, "ndim_func_", args=[])

        # pdf
        L = linalg.cholesky(self.sigma)
        sigma_det = linalg.det(self.sigma)  # XXX: compute from L instead
        sigma_inv = linalg.matrix_inverse(self.sigma)  # XXX: idem

        self.pdf_ = (
            (1. / T.sqrt((2. * np.pi) ** self.ndim_ * T.abs_(sigma_det))) *
            T.exp(-0.5 * T.sum(T.mul(T.dot(self.X - self.mu,
                                           sigma_inv),
                                     self.X - self.mu),
                               axis=1))).ravel()
        self._make(self.pdf_, "pdf")

        # -log pdf
        self.nll_ = -T.log(self.pdf_)  # XXX: for sure this can be better
        self._make(self.nll_, "nll")

        # self.rvs_
        self._make(T.dot(L, self.X.T).T + self.mu, "rvs_func_")
    def __init__(self, D, M, Q, Domain_number, m, pre_params, Pre_U,
                 Hiddenlayerdim1, Hiddenlayerdim2):

        self.Xlabel = T.matrix('Xlabel')

        self.X = T.matrix('X')
        N = self.X.shape[0]

        self.Weight = T.matrix('Weight')

        ker = kernel(Q)
        #mmd=MMD(M,Domain_number)

        mu_value = np.random.randn(M, D)
        Sigma_b_value = np.zeros((M, M)) + np.log(0.01)

        Z_value = m[:M]
        self.test = Z_value
        ls_value = np.zeros(Domain_number) + np.log(0.1)

        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b',
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)

        self.params = [self.mu, self.Sigma_b, self.Z, self.ls]

        self.hiddenLayer_x = HiddenLayer(rng=rng,
                                         input=self.X,
                                         n_in=D,
                                         n_out=Hiddenlayerdim1,
                                         activation=T.nnet.relu,
                                         number='_x')
        self.hiddenLayer_hidden = HiddenLayer(rng=rng,
                                              input=self.hiddenLayer_x.output,
                                              n_in=Hiddenlayerdim1,
                                              n_out=Hiddenlayerdim2,
                                              activation=T.nnet.relu,
                                              number='_h')
        self.hiddenLayer_m = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_hidden.output,
                                         n_in=Hiddenlayerdim2,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_hidden.output,
                                         n_in=Hiddenlayerdim2,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_S')

        self.loc_params = []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_hidden.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params = {}
        for i in self.loc_params:
            self.local_params[str(i)] = i

        self.params.extend(ker.params)
        #self.params.extend(mmd.params)

        self.hyp_params = {}
        for i in [self.mu, self.Sigma_b, self.ls]:
            self.hyp_params[str(i)] = i

        self.Z_params = {}
        for i in [self.Z]:
            self.Z_params[str(i)] = i

        self.global_params = {}
        for i in self.params:
            self.global_params[str(i)] = i

        self.params.extend(self.hiddenLayer_x.params)
        self.params.extend(self.hiddenLayer_hidden.params)
        self.params.extend(self.hiddenLayer_m.params)
        self.params.extend(self.hiddenLayer_S.params)

        self.wrt = {}
        for i in self.params:
            self.wrt[str(i)] = i

        for i, j in pre_params.items():
            self.wrt[i].set_value(j)

        for i, j in Pre_U.items():
            self.wrt[i].set_value(j)

        m = self.hiddenLayer_m.output
        S_0 = self.hiddenLayer_S.output
        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        #Kmm=mmd.MMD_kenel_Xonly(mmd.Zlabel_T,Kmm,self.Weight)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)
        #Kmn=mmd.MMD_kenel_ZX(self.Xlabel,Kmn,self.Weight)

        Knn = ker.RBF(Xtilda)
        #Knn=mmd.MMD_kenel_Xonly(self.Xlabel,Knn,self.Weight)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, self.U)
        betaI = T.diag(T.dot(self.Xlabel, beta))
        Covariance = betaI

        self.LL = (self.log_mvn(self.X, mean_U, Covariance) -
                   0.5 * T.sum(T.dot(betaI, Ktilda)))
        self.KL_X = -self.KLD_X(m, S)
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
    def __init__(self, params, correct, samples=20, batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params = params
        self.batch_size = batch_size

        #データの保存ファイル
        model_file_name = 'model2' + '.save'
        #もしこれまでに作ったのがあるならロードする
        try:
            print('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g = obj
                print('Loaded!')
            return
        except:
            print('Failed. Creating a new model...')

        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')

        lhyp = T.dvector('lhyp')
        ls = T.dvector('ls')

        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q])

        S = T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)

        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) +
                       T.diag(T.exp(T.diag(Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        U = mu_scaled + Sigma_scaled.dot(eps_M)

        print('Setting up cache...')

        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている

        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print('Modeling...')

        Kmn = ker.RBF(sf2, l, Z, Xtilda)
        Knn = ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))) * correct
        KL_X = -self.KLD_X(m, S) * correct
        KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)

        print('Compiling model ...')

        inputs = {
            'X': X,
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls,
            'eps_M': eps_M,
            'eps_NQ': eps_NQ
        }

        z = 0.0 * sum([
            T.sum(v) for v in inputs.values()
        ])  # solve a bug with derivative wrt inputs not in the graph

        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}

        wrt = {
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls
        }
        self.g = {
            vn: {
                gn: theano.function(list(inputs.values()),
                                    T.grad(gv + z, vv),
                                    name='d' + gn + '_d' + vn,
                                    on_unused_input='ignore')
                for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])
            }
            for vn, vv in wrt.items()
        }

        with open(model_file_name, 'wb') as file_handle:
            print('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g],
                        file_handle,
                        protocol=pickle.HIGHEST_PROTOCOL)
    def __init__(self, rng,input_m,input_S, n_in, n_out,inducing_number,Domain_number=None,
                 liklihood="Gaussian",Domain_consideration=True,number="1",kernel_name='X'):

        m=input_m
        self.cal=input_m
        S_0=input_S
        
        self.N=m.shape[0]
        D=n_out
        Q=n_in
        M=inducing_number
        
        #set_initial_value
        ker=kernel(Q,kernel_name)
        self.kern=ker
        mu_value = np.random.randn(M,D)* 1e-2
        Sigma_b_value = np.zeros((M,M))
        Z_value = np.random.randn(M,Q)
        if Domain_consideration:
            ls_value=np.zeros(Domain_number)+np.log(0.1)
        else:
            ls_value=np.zeros(1)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu'+number, borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b'+number, borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z'+number, borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        self.params = [self.mu,self.Sigma_b,self.Z,self.ls]
        
        
        self.params.extend(ker.params)
        
        self.hyp_params_list=[self.mu,self.Sigma_b,self.ls]
        self.Z_params_list=[self.Z]        
        self.global_params_list=self.params
        
        S_1=T.exp(S_0)
        S=T.sqrt(S_1)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((100,self.N,Q))
        eps_M = srng.normal((100,M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((100,self.N,D))
                          
        self.beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        #Xtilda = m[None,:,:] + S[None,:,:] * eps_NQ
        Xtilda, updates = theano.scan(fn=lambda a: m+S*a,
                              sequences=[eps_NQ])
                   
        #self.U = mu_scaled[None,:,:]+Sigma_scaled[None,:,:].dot(eps_M)
        self.U, updates = theano.scan(fn=lambda a: mu_scaled+Sigma_scaled.dot(a),
                              sequences=[eps_M])
        
        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        Knn, updates = theano.scan(fn=lambda a: self.kern.RBF(a),
                              sequences=[Xtilda])
        
        Kmn, updates = theano.scan(fn=lambda a: self.kern.RBF(self.Z,a),
                              sequences=[Xtilda])
        
        #Kmn = ker.RBF(self.Z,Xtilda)
        
        #Knn = ker.RBF(Xtilda)
        Ktilda, updates = theano.scan(fn=lambda a,b: a-T.dot(b.T,T.dot(KmmInv,b)),
                              sequences=[Knn,Kmn])
        #Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        
        F, updates = theano.scan(fn=lambda a,b,c,d: T.dot(a.T,T.dot(KmmInv,b)) + T.dot(T.maximum(c, 1e-16)**0.5,d),
                              sequences=[Kmn,self.U,Ktilda,eps_ND])
        #F = T.dot(Kmn.T,T.dot(KmmInv,self.U)) + T.dot(T.maximum(Ktilda, 1e-16)**0.5,eps_ND)
        
        #Kinterval=T.dot(KmmInv,Kmn)

        self.mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        
        #A=Kinterval.T      
        #Sigma_tilda=Ktilda+T.dot(A,T.dot(Sigma_scaled,A.T))
        #mean_tilda=T.dot(A,mu_scaled)        
        #self.mean_U=mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5,eps_ND)

        
        self.output=self.mean_U
        self.KL_X = -self.KLD_X(m,S)
        self.KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
Esempio n. 45
0
 def kreg(xtrain, ytrain, xtest):
     return kernmat(xtest, xtrain, sigma).dot(
         Tlina.matrix_inverse(
             kernmat(xtrain, xtrain, sigma) +
             theta * T.eye(xtrain.shape[0])).dot(ytrain))
Esempio n. 46
0
    def _problem_MERLiNbp(self, icoh=False):
        '''
        Set up cost function and return the pymanopt problem of the
        MERLiNbp algorithm ([1], Algorithm 4) or the MERLiNbpicoh
        algorithm ([1], Algorithm 5)

        Input (default)
            - icoh (False)
                False = set up MERLiNbp, True = set up MERLiNbpicoh

        Sets/updates
        self._problem_MERLiNbp_val or self._problem_MERLiNbpicoh_val
        and the shared theano variables
        self._T_S, self._T_Vi, self._T_Vr, self._T_Fi, self._T_Fr, self._T_n
        '''
        if (not icoh and self._problem_MERLiNbp_val is None) or (
                icoh and self._problem_MERLiNbpicoh_val is None):
            S = self._T_S = TS.shared(self._S)
            Vi = self._T_Vi = TS.shared(self._C[0])
            Vr = self._T_Vr = TS.shared(self._C[1])
            Fi = self._T_Fi = TS.shared(self._F[0].reshape(self._d, -1))
            Fr = self._T_Fr = TS.shared(self._F[1].reshape(self._d, -1))
            n = self._T_n = TS.shared(self._n)
            w = T.matrix()
            m = self._m
            # linear combination
            wFr = T.reshape(w.T.dot(Fr), (m, -1))  # m x n'
            wFi = T.reshape(w.T.dot(Fi), (m, -1))  # m x n'

            # replace zeros, since we're taking logs
            def unzero(x):
                return T.switch(T.eq(x, 0), 1, x)

            # bandpower
            def bp(re, im):
                return T.reshape(
                    T.mean(T.log(unzero(T.sqrt(re * re + im * im))) - T.log(n),
                           axis=1), (m, 1))

            wFbp = bp(wFr, wFi)  # m x 1
            vFbp = bp(Vr, Vi)  # m x 1
            # centering matrix
            I = T.eye(m, m)
            H = I - T.mean(I)
            # column-centered data
            X = H.dot(T.concatenate([S, vFbp, wFbp], axis=1))  # m x 3
            # covariance matrix
            S = X.T.dot(X) / (m - 1)
            # precision matrix
            prec = Tlina.matrix_inverse(S)
            # MERLiNbpicoh
            if icoh:
                # complex row-wise vdot
                # (x+yi)(u+vi) = (xu-yv)+(xv+yu)i
                # vdot i.e. -v instead of +v
                def vdot(x, y, u, v):
                    return x * u + y * v

                def vdoti(x, y, u, v):
                    return -x * v + y * u

                def cross(x, y, u, v):
                    return T.sum(vdot(x, y, u, v), axis=0) / m

                def crossi(x, y, u, v):
                    return T.sum(vdoti(x, y, u, v), axis=0) / m

                def sqrtcross(x, y):
                    return T.sqrt(cross(x, y, x, y) + crossi(x, y, x, y))

                icoherency = crossi(Vr, Vi, wFr, wFi) / (
                    sqrtcross(Vr, Vi) * sqrtcross(wFr, wFi))  # n'
                cost = -(T.abs_(T.sum(icoherency)) * T.abs_(prec[1, 2]) -
                         T.abs_(prec[0, 2]))
                self._problem_MERLiNbpicoh_val = Problem(manifold=None,
                                                         cost=cost,
                                                         arg=w,
                                                         verbosity=0)
            # MERLiNbp
            else:
                cost = -(T.abs_(prec[1, 2]) - T.abs_(prec[0, 2]))
                self._problem_MERLiNbp_val = Problem(manifold=None,
                                                     cost=cost,
                                                     arg=w,
                                                     verbosity=0)
        else:
            self._T_S.set_value(self._S)
            self._T_Vi.set_value(self._C[0])
            self._T_Vr.set_value(self._C[1])
            self._T_Fi.set_value(self._F[0].reshape(self._d, -1))
            self._T_Fr.set_value(self._F[1].reshape(self._d, -1))
            self._T_n.set_value(self._n)
        if not icoh:
            return self._problem_MERLiNbp_val
        else:
            return self._problem_MERLiNbpicoh_val
Esempio n. 47
0
    def __init__(self, init_w):
        self.w = sharedX(init_w)
        self.b = sharedX(0.)

        params = [self.w]

        X = T.matrix()
        y = T.vector()

        X.tag.test_value = np.zeros((100, 784), dtype='float32')
        y.tag.test_value = np.zeros((100, ), dtype='float32')

        self.cost = function([X, y], self.cost_samples(X, y).mean())
        alpha = T.scalar()
        alpha.tag.test_value = 1.

        cost_samples = self.cost_samples(X, y)
        assert cost_samples.ndim == 1

        cost = cost_samples.mean()
        assert cost.ndim == 0

        updates = {}

        for param in params:
            updates[param] = param - alpha * T.grad(cost, param)

        self.sgd_step = function([X, y, alpha], updates=updates)

        num_samples = cost_samples.shape[0]
        cost_variance = T.sqr(cost_samples - cost).sum() / (num_samples - 1)
        cost_std = T.sqrt(cost_variance)
        assert cost_std.ndim == 0

        caution = -2.

        bound = cost + caution * cost_std / T.sqrt(num_samples)

        updates = {}

        for param in params:
            updates[param] = param - alpha * T.grad(cost, param)

        self.do_step = function([X, y, alpha], updates=updates)
        self.experimental_step = function(
            [X, y, alpha],
            updates={self.w: self.w - alpha * T.grad(bound, param)})

        alphas = T.vector()
        alphas.tag.test_value = np.ones((2, ), dtype='float32')

        #also tried using grad of bound instead of cost (got to change it in do_step as well)
        W = self.w.dimshuffle(0, 'x') - T.grad(cost, self.w).dimshuffle(
            0, 'x') * alphas.dimshuffle('x', 0)
        B = self.b.dimshuffle('x') - T.grad(cost,
                                            self.b).dimshuffle('x') * alphas

        Z = T.dot(X, W) + B
        C = y.dimshuffle(0, 'x') * T.nnet.softplus(-Z) + (
            1 - y.dimshuffle(0, 'x')) * T.nnet.softplus(Z)

        means = C.mean(axis=0)
        variances = T.sqr(C - means).sum(axis=0) / (num_samples - 1)
        stds = T.sqrt(variances)
        bounds = means + caution * stds / T.sqrt(num_samples)

        self.eval_bounds = function([X, y, alphas], bounds)

        W = T.concatenate([self.w.dimshuffle('x', 0)] * batch_size, axis=0)

        z = (X * W).sum(axis=1)

        C = y * T.nnet.softplus(-z) + (1 - y) * T.nnet.softplus(z)

        grad_W = T.grad(C.sum(), W)

        zero_mean = grad_W - grad_W.mean()

        cov = T.dot(zero_mean.T, zero_mean)

        from theano.sandbox.linalg import matrix_inverse

        inv = matrix_inverse(cov + np.identity(784).astype('float32') * .01)

        self.nat_grad_step = function(
            [X, y, alpha],
            updates={
                self.w: self.w - alpha * T.dot(inv, T.grad(cost, self.w))
            })
    def __init__(self, params,correct,Xinfo, samples = 500,batch_size=None):
        ker = kernel()
        mmd = MMD()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        self.Xlabel_value=Xinfo["Xlabel_value"]
        self.Weight_value=Xinfo["Weight_value"]
        
        #データの保存ファイル
        model_file_name = 'model_MMD_kernel' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Xlabel=T.dmatrix('Xlabel')
        Zlabel=T.dmatrix('Zlabel')
        
        Zlabel_T=T.exp(Zlabel)/T.sum(T.exp(Zlabel),1)[:,None]#ラベルは確率なので正の値でかつ、企画化されている
        
        Weight=T.dmatrix('Weight')
        
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        ga=T.dvector('ga')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls)
        gamma=T.exp(ga[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        S=T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        Kmm=mmd.MMD_kenel_Xonly(gamma,Zlabel_T,Kmm,Weight)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Kmn=mmd.MMD_kenel_ZX(gamma,Zlabel_T,Xlabel,Kmn,Weight)
        
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        Knn=mmd.MMD_kenel_Xonly(gamma,Xlabel,Knn,Weight)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        betaI=T.diag(T.dot(Xlabel,beta))
        Covariance = betaI       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))*correct              
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        


        inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 
            'eps_M': eps_M, 'eps_NQ': eps_NQ,'ga':ga,'Zlabel':Zlabel,'Weight':Weight,'Xlabel':Xlabel}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls,'ga':ga,'Zlabel':Zlabel}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(10000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 49
0
def invM( M     = Th.dmatrix('M') , **result): 
    return matrix_inverse( Th.identity_like(M)-(M+M.T)/2 )
Esempio n. 50
0
def s_variance(K, y, var_y, prior_var, K_new, var_min):
    rK = psd(prior_var * K + var_y * TT.eye(y.shape[0]))
    L = cholesky(rK)
    v = dots(matrix_inverse(L), prior_var * K_new)
    var_x = TT.maximum(prior_var - (v ** 2).sum(axis=0), var_min)
    return var_x
Esempio n. 51
0
    def __init__(self, init_w):
        self.w = sharedX(init_w)
        self.b = sharedX(0.)

        params = [self.w ]

        X = T.matrix()
        y = T.vector()

        X.tag.test_value = np.zeros((100,784),dtype='float32')
        y.tag.test_value = np.zeros((100,),dtype='float32')

        self.cost = function([X,y],self.cost_samples(X,y).mean())
        alpha = T.scalar()
        alpha.tag.test_value = 1.

        cost_samples = self.cost_samples(X,y)
        assert cost_samples.ndim == 1

        cost = cost_samples.mean()
        assert cost.ndim == 0

        updates = {}

        for param in params:
            updates[param] = param - alpha * T.grad(cost,param)

        self.sgd_step = function([X,y,alpha],updates = updates)

        num_samples = cost_samples.shape[0]
        cost_variance = T.sqr(cost_samples-cost).sum() / ( num_samples - 1)
        cost_std = T.sqrt(cost_variance)
        assert cost_std.ndim == 0

        caution = -2.

        bound = cost + caution * cost_std / T.sqrt(num_samples)

        updates = {}

        for param in params:
            updates[param] = param - alpha * T.grad(cost,param)

        self.do_step = function([X,y,alpha],updates = updates)
        self.experimental_step = function([X,y,alpha],updates = { self.w: self.w - alpha * T.grad(bound,param) } )

        alphas = T.vector()
        alphas.tag.test_value = np.ones((2,),dtype='float32')

        #also tried using grad of bound instead of cost (got to change it in do_step as well)
        W = self.w.dimshuffle(0,'x') - T.grad(cost,self.w).dimshuffle(0,'x')* alphas.dimshuffle('x',0)
        B = self.b.dimshuffle('x') - T.grad(cost, self.b).dimshuffle('x') * alphas

        Z = T.dot(X,W) + B
        C = y.dimshuffle(0,'x') * T.nnet.softplus(-Z) + (1-y.dimshuffle(0,'x'))*T.nnet.softplus(Z)

        means = C.mean(axis=0)
        variances = T.sqr(C-means).sum(axis=0) / (num_samples - 1)
        stds = T.sqrt(variances)
        bounds = means + caution * stds / T.sqrt(num_samples)

        self.eval_bounds = function([X,y,alphas],bounds)


        W = T.concatenate( [self.w.dimshuffle('x',0) ] * batch_size, axis= 0)

        z = (X*W).sum(axis=1)

        C = y*T.nnet.softplus(-z) + (1-y)*T.nnet.softplus(z)

        grad_W = T.grad(C.sum(),W)

        zero_mean = grad_W - grad_W.mean()

        cov = T.dot(zero_mean.T,zero_mean)

        from theano.sandbox.linalg import matrix_inverse

        inv = matrix_inverse(cov + np.identity(784).astype('float32') * .01)

        self.nat_grad_step = function([X,y,alpha], updates = { self.w : self.w - alpha * T.dot( inv, T.grad(cost,self.w)) } )
Esempio n. 52
0
    def __init__(self,
                 rng,
                 target,
                 input_m,
                 input_S,
                 n_in,
                 n_out,
                 inducing_number,
                 Domain_number,
                 Xlabel,
                 liklihood="Gaussian",
                 Domain_consideration=True,
                 number="1"):

        m = input_m
        S_0 = input_S

        N = m.shape[0]
        D = n_out
        Q = n_in
        M = inducing_number

        #set_initial_value
        ker = kernel(Q)
        mu_value = np.random.randn(M, D) * 1e-2
        Sigma_b_value = np.zeros((M, M))
        Z_value = np.random.randn(M, Q)
        if Domain_consideration:
            ls_value = np.zeros(Domain_number) + np.log(0.1)
        else:
            ls_value = np.zeros(1) + np.log(0.1)

        self.mu = theano.shared(value=mu_value,
                                name='mu' + number,
                                borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b' + number,
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z' + number, borrow=True)
        self.ls = theano.shared(value=ls_value,
                                name='ls' + number,
                                borrow=True)

        self.params = [self.mu, self.Sigma_b, self.Z, self.ls]

        self.params.extend(ker.params)

        self.hyp_params_list = [self.mu, self.Sigma_b, self.ls]
        self.Z_params_list = [self.Z]
        self.global_params_list = self.params

        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((N, D))

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)

        Knn = ker.RBF(Xtilda)
        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        #F = T.dot(Kmn.T,T.dot(KmmInv,self.U)) + T.dot(T.maximum(Ktilda, 1e-16)**0.5,eps_ND)

        Kinterval = T.dot(KmmInv, Kmn)
        A = Kinterval.T
        Sigma_tilda = Ktilda + T.dot(A, T.dot(Sigma_scaled, A.T))
        mean_tilda = T.dot(A, mu_scaled)
        #mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        mean_U = mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5, eps_ND)
        betaI = T.diag(T.dot(Xlabel, beta))
        Covariance = betaI

        self.output = mean_U

        self.LL = self.log_mvn(
            target, mean_U, Covariance) / N  # - 0.5*T.sum(T.dot(betaI,Ktilda))
        self.KL_X = -self.KLD_X(m, S)
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
Esempio n. 53
0
    def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
        ker, self.samples, self.params, self.KmmInv  = kernel(), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 
            'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
        
        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
                       'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 
            'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
        self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
    def __init__(self, params,correct, samples = 500,batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        
        #データの保存ファイル
        model_file_name = 'model2' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Wx, Ws, Wu=\
        T.dmatrices('Wx', 'Ws', 'Wu')

        bx, bs, bu=\
        T.dvectors('bx', 'bs', 'bu')

        gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\
        T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s")
    
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        #隠れ層の生成
        out1=self.neural_net_predict(Wx,bx,gamma_x,beta_x,X)
        m=self.neural_net_predict(Wu,bu,gamma_u,beta_u,out1)
        S=self.neural_net_predict(Ws,bs,gamma_s,beta_s,out1)
        #outputs1 = T.dot(X,Wx) + bx
        #m = T.dot(out1,Wu) + bu
        #S=T.dot(out1,Ws) + bs
                 
        S=T.exp(S)
        S=T.sqrt(S)
        
        Xtilda = m+S*eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        Covariance = beta       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct      
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        

        inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\
                  "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 55
0
    def __init__(self,D, M,Q,Domain_number):
        
        self.Xlabel=T.matrix('Xlabel')

        
        self.X=T.matrix('X')
        N=self.X.shape[0]
        
        self.Weight=T.matrix('Weight')

        ker=kernel(Q)
        mmd=MMD(M,Domain_number)
        
        mu_value = np.random.randn(M,D)
        Sigma_b_value = np.zeros((M,M)) + np.log(0.01)

        Z_value = np.random.randn(M,Q)
        self.test=Z_value
        ls_value=np.zeros(Domain_number)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b', borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)
        
        self.params = [self.mu,self.Sigma_b,self.Z,self.ls]
        
        self.hiddenLayer_x = HiddenLayer(rng=rng,input=self.X,n_in=D,n_out=20,activation=T.nnet.relu,number='_x')
        self.hiddenLayer_m = HiddenLayer(rng=rng,input=self.hiddenLayer_x.output,n_in=20,n_out=Q,activation=T.nnet.relu,number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,input=self.hiddenLayer_x.output,n_in=20,n_out=Q,activation=T.nnet.relu,number='_S')
        
        self.loc_params= []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params={}
        for i in self.loc_params:
            self.local_params[str(i)]=i
        
        self.params.extend(ker.params)
        self.params.extend(mmd.params)
        
        self.global_params={}
        for i in self.params:
            self.global_params[str(i)]=i
        
        self.params.extend(self.hiddenLayer_x.params)
        self.params.extend(self.hiddenLayer_m.params)
        self.params.extend(self.hiddenLayer_S.params)
        
        self.wrt={}
        for i in self.params:
            self.wrt[str(i)]=i
        
        m=self.hiddenLayer_m.output
        S_0=self.hiddenLayer_S.output
        S_1=T.exp(S_0)
        S=T.sqrt(S_1)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N,Q))
        eps_M= srng.normal((M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        self.U = mu_scaled+Sigma_scaled.dot(eps_M)
        
        Kmm = ker.RBF(self.Z)
        Kmm=mmd.MMD_kenel_Xonly(mmd.Zlabel_T,Kmm,self.Weight)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        Kmn = ker.RBF(self.Z,Xtilda)
        Kmn=mmd.MMD_kenel_ZX(self.Xlabel,Kmn,self.Weight)
        
        Knn = ker.RBF(Xtilda)
        Knn=mmd.MMD_kenel_Xonly(self.Xlabel,Knn,self.Weight)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,self.U)
        betaI=T.diag(T.dot(self.Xlabel,beta))
        Covariance = betaI       
        
        self.LL = (self.log_mvn(self.X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))            
        self.KL_X = -self.KLD_X(m,S)
        self.KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)