Beispiel #1
0
    def _build_cross_ent(self, weights, means, covars, kernel_chol):
        cross_ent = 0.0
        for i in xrange(self.num_components):
            sum_val = 0.0
            for j in xrange(self.num_latent):
                if self.diag_post:
                    # TODO(karl): this is a bit inefficient since we're not making use of the fact
                    # that covars is diagonal. A solution most likely involves a custom tf op.
                    trace = tf.trace(
                        tf.cholesky_solve(kernel_chol[j, :, :],
                                          tf.diag(covars[i, j, :])))
                else:
                    trace = tf.reduce_sum(
                        util.diag_mul(
                            tf.cholesky_solve(kernel_chol[j, :, :],
                                              covars[i, j, :, :]),
                            tf.transpose(covars[i, j, :, :])))

                sum_val += (util.CholNormal(
                    means[i, j, :], kernel_chol[j, :, :]).log_prob(0.0) -
                            0.5 * trace)

            cross_ent += weights[i] * sum_val

        return cross_ent
Beispiel #2
0
 def cholesky_solve(chol, rhs, name=None):
     """Broadcasting batch cholesky solve."""
     try:
         return tf.cholesky_solve(chol, rhs, name=name)
     except ValueError:
         chol, rhs = tf_utils.broadcast_outer_dims((chol, 2), (rhs, 2))
         return tf.cholesky_solve(chol, rhs, name=name)
Beispiel #3
0
    def calculate_factorizations(self):
        batched_eye = tf.eye(self.num_induced_points,
                             batch_shape=[self.num_outputs],
                             dtype=float_type)
        # TODO: Change 1e-6 to the respective constant of GPflow
        Kmm = self.K(self.Z) + 1e-6 * batched_eye
        Kmn = self.K(self.Z, self.X)
        L = tf.cholesky(Kmm)
        V = tf.matrix_triangular_solve(L, Kmn)
        G = self.variance[:, None] - tf.reduce_sum(tf.square(V), axis=[1])
        G = tf.sqrt(1.0 + G / self.noise[:, None])
        V = V / G[:, None]
        Am = tf.cholesky(tf.matmul(V, V, transpose_b=True) + \
                         self.noise[:, None, None] * batched_eye)
        At = tf.matmul(L, Am)
        iAt = tf.matrix_triangular_solve(At, batched_eye)
        Y_ = tf.transpose(self.Y)[:, :, None]
        beta = tf.matrix_triangular_solve(L,
                                          tf.cholesky_solve(
                                              Am, (V / G[:, None]) @ Y_),
                                          adjoint=True)[:, :, 0]
        iB = tf.matmul(iAt, iAt, transpose_a=True) * self.noise[:, None, None]
        iK = tf.cholesky_solve(L, batched_eye) - iB

        return iK, beta
Beispiel #4
0
def gauss_kl(min_q_mu, q_sq,K):
    q_mu=-1*min_q_mu

    #q_sqrt=tf.cholesky(tf.squeeze(q_sqrt))
        # K is a variance...we sqrt later
    '''
    N=1
    Q=5
    q_mu=tf.random_normal([Q,1],dtype=tf.float64)
    q_var=tf.random_normal([Q,Q],dtype=tf.float64)
    q_var=q_var+tf.transpose(q_var [1,0])+1e+1*np.eye(Q)
    K=q_var
    q_sqrt=tf.cholesky(q_var)
    q_sqrt=tf.expand_dims(q_sqrt,-1)
    num_latent=1
    s=tf.Session()
    s.run(tf.initialize_all_variables())
    '''
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).

    q_sqrt=tf.cholesky(K)
    L = tf.cholesky(q_sq)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL +=   0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0], tf.float64)

    Lq = tf.batch_matrix_band_part(q_sqrt, -1, 0)
    # Log determinant of q covariance:
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
    LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    """
    V2=tf.cholesky(K)
    V1=tf.cholesky(q_sq)
    KL=h.Mul(tf.transpose(q_mu),tf.cholesky_solve(V2,q_mu))
    KL+=tf.trace(tf.cholesky_solve(V2,q_sq))
    KL-=h.get_dim(K,0)
    KL+=tf.reduce_sum(2*tf.log(tf.diag_part(V2))-2*tf.log(tf.diag_part(V1)))
    return KL/2
Beispiel #5
0
def F_bound2_v2(y,S,Kmm,Knm,Kmnnm,Tr_Knn,sigma):
    #matrices to be used
    N=get_dim(y,0)
    Kmm_chol=tf.cholesky(Kmm)
    Q_nn=tf.square(sigma)*np.eye(N)+Mul(Knm,tf.cholesky_solve(Kmm_chol,tf.transpose(Knm)))
    bound=-0.5*(Tr_Knn-tf.trace(tf.cholesky_solve(Kmm_chol,Kmnnm)))/tf.square(sigma)
    bound+=multivariate_normal(y, tf.zeros([N,1],dtype=tf.float32), tf.cholesky(Q_nn))
    return bound
Beispiel #6
0
 def calculate_factorizations(self):
     K = self.K(self.X)     # self.K: kernel function
     batched_eye = tf.eye(tf.shape(self.X)[0], batch_shape=[self.num_outputs], dtype=float_type) # construct a batch of num_outputs identity matrices, each[self.X.shape[0],self.X.shape[0])
     L = tf.cholesky(K + self.noise[:, None, None]*batched_eye)   # Cholesky decomposition
     iK = tf.cholesky_solve(L, batched_eye)  # K @ iK = batched_eye
     Y_ = tf.transpose(self.Y)[:, :, None]
     # Why do we transpose Y? Maybe we need to change the definition of self.Y() or beta?
     beta = tf.cholesky_solve(L, Y_)[:, :, 0]  # K @ beta = Y_
     return iK, beta
Beispiel #7
0
 def calculate_factorizations(self):
     K = self.K(self.X)
     batched_eye = tf.eye(tf.shape(self.X)[0], batch_shape=[self.num_outputs], dtype=float_type)
     L = tf.cholesky(K + self.noise[:, None, None]*batched_eye)
     iK = tf.cholesky_solve(L, batched_eye)
     Y_ = tf.transpose(self.Y)[:, :, None]
     # Why do we transpose Y? Maybe we need to change the definition of self.Y() or beta?
     beta = tf.cholesky_solve(L, Y_)[:, :, 0] # beta from paper
     return iK, beta
Beispiel #8
0
    def _build_cross_ent(self, weights, means, covars, link_covars,
                         kernel_chol, kernlink_chol):
        cross_ent = 0.0
        for i in range(self.num_components):
            sum_val = 0.0
            for r in range(self.num_block):
                dim_block = len(self.block_struct[r])
                # construct Khh^-1
                if dim_block == 1:
                    # convert float dummy==1.0 to rank 2 tensor
                    Khh_inv = tf.expand_dims(
                        tf.expand_dims(kernlink_chol[r], 0), 1)
                    log_det = util.log_cholesky_det(kernel_chol[r])
                else:
                    Khh_inv = tf.cholesky_solve(kernlink_chol[r],
                                                tf.eye(dim_block))
                    # construct ln|Kr_uu|
                    log_det = self.num_inducing * util.log_cholesky_det(kernlink_chol[r]) + \
                                dim_block * util.log_cholesky_det(kernel_chol[r])

                # calculate m_r'(Kuu^-1)m_r
                means_r = [
                    tf.expand_dims(means[i, j, :], 1)
                    for j in self.block_struct[r]
                ]
                quad_form = 0.0
                for j in range(dim_block):
                    sum_means = tf.add_n(
                        [Khh_inv[j, h] * means_r[h] for h in range(dim_block)])
                    quad_form += tf.reduce_sum(
                        means_r[j] *
                        tf.cholesky_solve(kernel_chol[r], sum_means))

                # calculate trace[(Kuu^-1)Sk_r]
                if self.diag_post:
                    # where Sk_r diagonal, trace reduces to sum of diagonal inner products over j in block r,
                    # scaled by Khh_inv[j,j]
                    diag_inv = tf.diag_part(
                        tf.cholesky_solve(kernel_chol[r],
                                          tf.eye(self.num_inducing)))
                    cov_diag = [covars[i, j, :] for j in self.block_struct[r]]
                    trace = tf.reduce_sum(diag_inv * tf.add_n([
                        Khh_inv[j, j] * cov_diag[j] for j in range(dim_block)
                    ]))

                else:
                    trace = tf.trace(tf.matmul(Khh_inv, tf.matmul(link_covars[i][r], link_covars[i][r], transpose_b=True))) * \
                            tf.trace(tf.matmul(tf.cholesky_solve(kernel_chol[r], covars[i, r, :, :]), covars[i, r, :, :],
                                transpose_b=True))

                sum_val += dim_block * self.num_inducing * tf.log(
                    2.0 * np.pi) + log_det + quad_form + trace

            cross_ent += -0.5 * weights[i] * sum_val

        return cross_ent
Beispiel #9
0
    def call(self, v):
        if self.constrained:
            z = tf.cholesky_solve(self.chol, self.h + v)
            y = tf.cholesky_solve(self.chol_constraint,
                                  tf.matmul(self.C, z) - self.d)
            return tf.cholesky_solve(
                self.chol, self.h + v - tf.matmul(self.C, y, transpose_a=True))

        else:
            return tf.cholesky_solve(self.chol, self.h + v)
Beispiel #10
0
    def calculate_factorizations(self):
        K = self.K(self.X)
        batched_eye = tf.eye(tf.shape(self.X)[0],
                             batch_shape=[self.num_outputs],
                             dtype=float_type)
        L = tf.cholesky(K + self.noise[:, None, None] * batched_eye)
        iK = tf.cholesky_solve(L, batched_eye)
        Y_ = tf.transpose(self.Y)[:, :, None]

        beta = tf.cholesky_solve(L, Y_)[:, :, 0]
        return iK, beta
Beispiel #11
0
def myopicController_noBdiff(X_est,PI_est,Control,gamma,true_model_est,
                              true_model_est_null,target_model_est,xdim,udim):
    #graphs for updating state and observation, but B is not differentiable with respect to state
    
    #true_model_est: state est. gradient, controlled dyamics, must depend upon X_plus, Control
    #true_model_est_null: state est. gradient null control, must depend upon X_plus
    #target_model_est: state. est. target dynamics, must depend upon X_plus

    #control coupling matrix, evaluated at state estimate, NOT TRUE STATE
    B = grad_elemwise(true_model_est,Control)

    #first expected term E(B^T B) + gamma I
    #gamma = 1e-4 #regularization term
    #(B^T B) + gamma I
    exp1_1 = tf.matmul(tf.transpose(B),B)+gamma*np.eye(xdim,xdim)

    #B^T* (f-g)
    exp2_1 = mvMul(tf.transpose(B),tf.squeeze(true_model_est_null-target_model_est))
    #0.25* B^T* Tr_{2,3}([f''-g'']Sigma)
    Pistack3 = tf.stack([PI_est,PI_est])
    fdp = hess_elemwise(true_model_est,X_est)
    gdp = hess_elemwise(target_model_est,X_est)
    exp2_2 = 0.25*mvMul(tf.transpose(B),tf.trace(tf.matmul((fdp-gdp),Pistack3)))
    
    exp1_approx_meanonly = exp1_1
    exp2_approx_meanonly = exp2_1+exp2_2
    #Control_new = -1.0*mvMul(tf.matrix_inverse(exp1_approx_meanonly),exp2_approx_meanonly)    
    #avoid matrix inversion
    #Control_new = tf.squeeze(
    #    tf.matrix_solve(exp1_approx_meanonly,-1.0*tf.expand_dims(exp2_approx_meanonly,1)))
    Control_new = tf.squeeze(
        tf.cholesky_solve(tf.cholesky(exp1_approx_meanonly),-1.0*tf.expand_dims(exp2_approx_meanonly,1)))

    
    return Control_new
Beispiel #12
0
def myopicController_meanonly(X_est,PI_est,Control,gamma,true_model_est,
                              true_model_est_null,target_model_est,xdim,udim):
    #graphs for updating state and observation
    #true_model_est: state est. gradient, controlled dyamics, must depend upon X_plus, Control
    #true_model_est_null: state est. gradient null control, must depend upon X_plus
    #target_model_est: state. est. target dynamics, must depend upon X_plus

    #control coupling matrix, evaluated at state estimate, NOT TRUE STATE
    B = grad_elemwise(true_model_est,Control)

    #first expected term E(B^T B) + gamma I
    #gamma = 1e-4 #regularization term
    #(B^T B) + gamma I
    exp1_1 = tf.matmul(tf.transpose(B),B)+gamma*np.eye(xdim,xdim)

    #B'^T* (f-g)
    exp2_1 = mvMul(tf.transpose(B),tf.squeeze(true_model_est_null-target_model_est))
    
    exp1_approx_meanonly = exp1_1
    exp2_approx_meanonly = exp2_1
    #Control_new = -1.0*mvMul(tf.matrix_inverse(exp1_approx_meanonly),exp2_approx_meanonly)    
    #avoid matrix inversion
    #Control_new = tf.squeeze(
    #    tf.matrix_solve(exp1_approx_meanonly,-1.0*tf.expand_dims(exp2_approx_meanonly,1)))
    Control_new = tf.squeeze(
        tf.cholesky_solve(tf.cholesky(exp1_approx_meanonly),-1.0*tf.expand_dims(exp2_approx_meanonly,1)))

    
    return Control_new
Beispiel #13
0
    def nlml(self,Xu,Xf,Yu1, Yu2, Yu3, Yf,dt, hyp1, hyp3, hyp5, sig_n, lambda1, lambda2, un_u, un_f, kernel_type, jitter=1.0e-10): # negative logarithm marginal-likelihood

#        sess1 = tf.Session()
#        sess1.run(tf.global_variables_initializer())


#        xf_train = np.linspace(-8.0,8.0,self.Nf+2)[1:-1].reshape((-1,1))
#        
#        
#        yf_train = self.u_exact(xf_train,0.0, u_exa, t_exa, x_exa, 2)
#        #yf_train = yf_train+np.linalg.cholesky(previous_cov_mat[:Nf,:Nf])@ np.random.randn(Nf,1)
#        
#        xu_train = np.array([[-8.0], [8.0]],dtype=np.float64)
#        
#        Nu = xu_train.shape[0]
#        Nf = xf_train.shape[0]        
#
#
#        
#        un_u = self.u_exact(xu_train,init_time,u_exa, t_exa, x_exa, 1)
#        un_f = yf_train

        N = 3*(self.Nu + self.Nf)
        self.K0 = self.kernel_uf_train(Xu,Xf,self.Nu, self.Nf, hyp1,hyp3,hyp5,self.a, self.b, self.c, lambda1, lambda2, un_u, un_f, dt)
#        self.K0 = self.kernel_uf_train(xu_train,xf_train,self.Nu, self.Nf, hyp1,hyp3,hyp5,self.a, self.b, self.c, lambda1, lambda2, un_u, un_f, dt)

        K = self.K0 + (sig_n**2+jitter)*tf.eye(N,dtype=tf.float64)
         
        self.L = tf.cholesky(K)
        r = tf.concat((Yu1,Yu2,Yu3,Yf,Yf,Yf),axis=0)
        self.alpha = tf.cholesky_solve(self.L, r)
        self.sig2_tf = tf.matmul(r, self.alpha, transpose_a=True)/N
        return 0.5 * N * tf.log(2.0*np.pi*self.sig2_tf)\
                +tf.reduce_sum(tf.log(tf.diag_part(self.L))) \
                + N/2.0    
Beispiel #14
0
    def KL(self):
        """
        The KL divergence from the variational distribution to the prior

        :return: KL divergence from N(q_mu, q_sqrt) to N(0, I), independently for each GP
        """

        self.build_cholesky_if_needed()

        KL = -0.5 * self.num_inducing * self.num_nodes * self.dim_per_out

        for nd in range(self.num_nodes):
            q_sqrt_nd = self.q_sqrt_lst[nd]
            with params_as_tensors_for(q_sqrt_nd, convert=True):
                KL -= 0.5 * tf.reduce_sum(
                    tf.log(tf.matrix_diag_part(q_sqrt_nd)**2))

                KL += tf.reduce_sum(tf.log(tf.matrix_diag_part(
                    self.Lu[nd]))) * self.dim_per_out
                KL += 0.5 * tf.reduce_sum(
                    tf.square(
                        tf.matrix_triangular_solve(
                            self.Lu_tiled_lst[nd], q_sqrt_nd, lower=True)))
                q_mu_nd = self.q_mu[:, nd * self.dim_per_out:(nd + 1) *
                                    self.dim_per_out]
                Kinv_m_nd = tf.cholesky_solve(self.Lu[nd], q_mu_nd)
                KL += 0.5 * tf.reduce_sum(q_mu_nd * Kinv_m_nd)

        return KL
Beispiel #15
0
    def _build_interim_vals(self, kernel_chol, inducing_inputs, train_inputs):
        """Helper function for `_build_ell`

        Args:
            kernel_chol: Tensor(num_latents, num_inducing, num_inducing)
            inducing_inputs: Tensor(num_latents, num_inducing, input_dim)
            train_inputs: Tensor(batch_size, input_dim)
        Returns:
            `kern_prods` (num_latents, batch_size, num_inducing)
            and `kern_sums` (num_latents, batch_size)
        """
        # shape of ind_train_kern: (num_latents, num_inducing, batch_size)

        kern_prods = [0.0 for _ in range(self.num_latents)]
        kern_sums = [0.0 for _ in range(self.num_latents)]

        for i in range(self.num_latents):
            ind_train_kern = self.cov[i].cov_func(inducing_inputs[i, :, :],
                                                  train_inputs)
            # Compute A = Kxz.Kzz^(-1) = (Kzz^(-1).Kzx)^T.
            kern_prods[i] = tf.transpose(
                tf.cholesky_solve(kernel_chol[i, :, :], ind_train_kern))
            # We only need the diagonal components.
            kern_sums[i] = (self.cov[i].diag_cov_func(train_inputs) -
                            util.mul_sum(kern_prods[i],
                                         tf.matrix_transpose(ind_train_kern)))

        kern_prods = tf.stack(kern_prods, 0)
        kern_sums = tf.stack(kern_sums, 0)

        return kern_prods, kern_sums
Beispiel #16
0
def mlpg_univariate(means, stds, weights):
    """Generate a trajectory out of a time sequence of gaussian parameters.

    The algorithm used is taken from Tokuda, K. et alii (2000). Speech
    Parameter Generation Algorithms for HMM-based speech synthesis. It
    aims at generating the most likely trajectory sequence based on
    gaussian parameters fitted to an input sequence of some kind.

    means   : time sequence of means (1-D tensor)
    stds    : time sequence of standard deviations (1-D tensor)
    weights : matrix of weights to derive successive orders
              of dynamic features out of static ones (2-D tensor)

    The means and standard deviations should consist of the time
    sequence of parameters for static features first, followed by the
    time sequence of delta features parameters and finally by that of
    delta delta features parameters.
    """
    # Test arguments' rank validity.
    tf.control_dependencies([
        tf.assert_rank(means, 1),
        tf.assert_rank(stds, 1),
        tf.assert_rank(weights, 2)
    ])
    # Compute the terms of the parameters generation system.
    inv_stds = tf.matrix_diag(1 / (tf.square(stds) + 1e-30))
    timed_variance = tf.matmul(tf.matrix_transpose(weights), inv_stds)
    left_term = tf.matmul(timed_variance, weights)
    right_term = tf.matmul(timed_variance, tf.expand_dims(means, 1))
    # Solve the system using cholesky decomposition.
    static_features = tf.cholesky_solve(tf.cholesky(left_term), right_term)
    # Add dynamic features to the predicted static ones and return them.
    return tf.matmul(weights, static_features)
Beispiel #17
0
    def create_prediction(tf_input):

        t_AInv = tf.cholesky_solve(t_L_aa, tf.eye(t_M, dtype=dtype))

        t_PredictAlpha = (1.0 / t_beta) * tf.matrix_triangular_solve(
            t_L_uu, tf.matmul(t_AInv, t_GammaT), lower=True, adjoint=True)

        t_K_x_Z = kernel.covar_matrix(tf_input, t_Z)

        t_y_mean = t_beta * tf.matmul(t_K_x_Z, t_PredictAlpha)

        t_K_x_x_diag = kernel.covar_diag(tf_input)

        t_L_uuInv_K_Z_x = tf.matrix_triangular_solve(t_L_uu,
                                                     tf.transpose(t_K_x_Z),
                                                     lower=True)

        t_G = (1.0 / t_beta) * t_AInv - tf.eye(t_M, dtype=dtype)

        t_y_var = t_K_x_x_diag \
                  + tf.reduce_sum(t_L_uuInv_K_Z_x * tf.matmul(t_G, t_L_uuInv_K_Z_x), axis=0) \
                  + (1.0 / t_beta) * tf.ones([tf.shape(tf_input)[0]], dtype=dtype)

        t_y_var = t_y_var[:, tf.newaxis]

        return t_y_mean, t_y_var
Beispiel #18
0
 def add_pair_lamd(self, static, moving, n_neighbors):
     lfd1 = 1
     lfd2 = 0.05
     RotationTiled = tf.tile(tf.expand_dims(self.Rotation, 0),
                             (moving.NCELLS, 1, 1))
     TransformedMeans = tf.matmul(RotationTiled,
                                  tf.expand_dims(moving.Means, -1))
     TransformedMeans = tf.squeeze(TransformedMeans, -1) + self.Translation
     TransformedCovars = tf.matmul(
         tf.matmul(RotationTiled, moving.Covariances, transpose_a=True),
         RotationTiled)
     Distances = tf.expand_dims(TransformedMeans, 1) - tf.expand_dims(
         static.Means, 0)
     CSum = tf.expand_dims(TransformedCovars, 1) + tf.expand_dims(
         static.Covariances, 0)
     MCov = tf.reshape(
         tf.tile(tf.expand_dims(moving.Covariances, 1), (1, 2, 1, 1)),
         [-1, 3, 3])
     Distances, CSum = self.n_nearest(Distances, CSum, n_neighbors)
     #Instead of inverse, cholesky decomposition
     #CInv = tf.matrix_inverse(CSum)
     with tf.device('/device:CPU:0'):
         CInv = tf.cholesky_solve(
             CSum,
             tf.tile(tf.expand_dims(tf.eye(3), 0),
                     (tf.shape(CSum)[0], 1, 1)))
     m_ij = tf.expand_dims(Distances, 2)
     l = tf.matmul(tf.matmul(m_ij, CInv, transpose_a=True), m_ij)
     likelihood = tf.exp(-lfd2 * l / 2)
     loss = -lfd1 * tf.reduce_sum(likelihood)
     G, H = gradients(m_ij, CInv, MCov, likelihood, lfd2, self.PARAMS[3:])
     return loss, G, H
Beispiel #19
0
    def KL(self):
        """
        The KL divergence from the variational distribution to the prior
        :return: KL divergence from N(q_mu, q_sqrt) to N(0, I), independently for each GP
        """
        # if self.white:
        #     return gauss_kl(self.q_mu, self.q_sqrt)
        # else:
        #     return gauss_kl(self.q_mu, self.q_sqrt, self.Ku)

        self.build_cholesky_if_needed()

        KL = -0.5 * self.num_outputs * self.num_inducing
        KL -= 0.5 * tf.reduce_sum(tf.log(tf.matrix_diag_part(self.q_sqrt)**2))

        if not self.white:
            KL += tf.reduce_sum(tf.log(tf.matrix_diag_part(
                self.Lu))) * self.num_outputs
            KL += 0.5 * tf.reduce_sum(
                tf.square(
                    tf.matrix_triangular_solve(
                        self.Lu_tiled, self.q_sqrt, lower=True)))
            Kinv_m = tf.cholesky_solve(self.Lu, self.q_mu)
            KL += 0.5 * tf.reduce_sum(self.q_mu * Kinv_m)
        else:
            KL += 0.5 * tf.reduce_sum(tf.square(self.q_sqrt))
            KL += 0.5 * tf.reduce_sum(self.q_mu**2)

        return KL
Beispiel #20
0
    def build_backward_variance(self, Yvar):
        """
        Additional method for scaling variance backward (used in :class:`.Normalizer`). Can process both the diagonal
        variances returned by predict_f, as well as full covariance matrices.

        :param Yvar: size N x N x P or size N x P
        :return: Yvar scaled, same rank and size as input
        """
        rank = tf.rank(Yvar)
        # Because TensorFlow evaluates both fn1 and fn2, the transpose can't be in the same line. If a full cov
        # matrix is provided fn1 turns it into a rank 4, then tries to transpose it as a rank 3.
        # Splitting it in two steps however works fine.
        Yvar = tf.cond(tf.equal(rank,
                                2), lambda: tf.matrix_diag(tf.transpose(Yvar)),
                       lambda: Yvar)
        Yvar = tf.cond(tf.equal(rank,
                                2), lambda: tf.transpose(Yvar, perm=[1, 2, 0]),
                       lambda: Yvar)

        N = tf.shape(Yvar)[0]
        D = tf.shape(Yvar)[2]
        L = tf.cholesky(tf.square(tf.transpose(self.A)))
        Yvar = tf.reshape(Yvar, [N * N, D])
        scaled_var = tf.reshape(
            tf.transpose(tf.cholesky_solve(L, tf.transpose(Yvar))), [N, N, D])
        return tf.cond(tf.equal(rank,
                                2), lambda: tf.reduce_sum(scaled_var, axis=1),
                       lambda: scaled_var)
Beispiel #21
0
 def build_backward(self, Y):
     """
     TensorFlow implementation of the inverse mapping
     """
     L = tf.cholesky(tf.transpose(self.A))
     XT = tf.cholesky_solve(L, tf.transpose(Y-self.b))
     return tf.transpose(XT)
Beispiel #22
0
    def inference(self, features, outputs, is_train):
        """Build graph for computing predictive mean and variance and negative log probability.

        Args:
            train_inputs: inputs
            train_outputs: targets
            is_train: whether we're training
        Returns:
            negative log marginal likelihood
        """
        inputs = features['input']
        assignments = []
        if is_train:
            # During training, we have to store the training data to compute predictions later on
            assignments.append(self.train_inputs.assign(inputs))
            assignments.append(self.train_outputs.assign(outputs))

        with tf.control_dependencies(
                assignments):  # this ensures that the assigments are executed
            chol, alpha = self._build_interim_vals(inputs, outputs)
        # precision = inv(kxx)
        precision = tf.cholesky_solve(chol, tf.eye(tf.shape(inputs)[-2]))
        precision_diag = tf.matrix_diag_part(precision)

        loo_fmu = outputs - alpha / precision_diag  # GMPL book eq. 5.12
        loo_fs2 = 1.0 / precision_diag  # GMPL book eq. 5.12

        # log probability (lp), also called log pseudo-likelihood)
        lp = self._build_loo(outputs, loo_fmu, loo_fs2)

        return {'loss': -lp, 'LP': lp}, []
Beispiel #23
0
    def nlml(self,
             Xu,
             Xf,
             Yu,
             Yf,
             dt,
             hyp1,
             hyp3,
             hyp5,
             sig_n,
             lambda1,
             lambda2,
             un_u,
             un_f,
             kernel_type,
             jitter=1.0e-10):  # negative logarithm marginal-likelihood

        Nu = Xu[0].shape[0] + Xu[1].shape[0] + Xu[2].shape[0]

        N = Nu + 3 * Xf.shape[0]
        self.K0 = self.kernel_uf_train(Xu, Xf, hyp1, hyp3, hyp5, self.a,
                                       self.b, self.c, lambda1, lambda2, un_u,
                                       un_f, dt)
        K = self.K0 + (sig_n**2 + jitter) * tf.eye(N, dtype=tf.float64)

        self.L = tf.cholesky(K)
        r = np.concatenate((Yu[0],Yu[1],Yu[2],Yf,Yf,Yf),axis=0)\
          - np.concatenate((np.zeros((Nu,1),dtype=np.float64), self.prior_mean_train[0], self.prior_mean_train[1], self.prior_mean_train[2]),axis=0)
        self.alpha = tf.cholesky_solve(self.L, r)
        self.sig2_tf = tf.matmul(r, self.alpha, transpose_a=True) / N
        return 0.5 * N * tf.log(2.0*np.pi*self.sig2_tf)\
                +tf.reduce_sum(tf.log(tf.diag_part(self.L))) \
                + N/2.0
Beispiel #24
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel
    :return: NxDxM
    """

    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern, feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
    def update_W_external(self, X, Y):
        Kdiag = self.kern.Kdiag(X, full_output_cov=False)
        Kux = features.Kuf(self.feature, self.kern, X)
        Kuu = features.Kuu(self.feature, self.kern, jitter=settings.jitter)

        # Copy this into blocks for each dimension
        Kuu = features.Kuu(self.feature, self.kern, jitter=settings.jitter)
        L = tf.cholesky(Kuu)

        sigma2 = self.likelihood.variance

        A = tf.cholesky_solve(L, Kux)  # K x M x N
        mean = tf.matmul(A,
                         tf.transpose(self.q_mu)[:, :, None],
                         transpose_a=True)
        err = (Y - mean)

        reg1 = tf.reduce_sum(
            tf.pow(tf.matmul(A, self.q_sqrt, transpose_a=True), 2), 2)
        reg2 = tf.transpose(Kdiag) - tf.einsum('kmn,kmn->kn', A, Kux)

        logW = -0.5 * tf.log(2 * np.pi * sigma2) \
            - 0.5 * tf.reduce_sum(tf.pow(err, 2), 2) / sigma2 \
            - 0.5 * reg1 / sigma2 - 0.5 * reg2 / sigma2 + tf.log(self.W_prior)[:, None]

        logW = logW - tf.reduce_logsumexp(logW, axis=0, keepdims=True)
        return tf.transpose(logW)
Beispiel #26
0
def Bound2(phi_0, phi_1, phi_2, sigma_noise, K_mm, mean_y):
    # Preliminary Bound
    beta = 1 / tf.square(sigma_noise)
    bound = 0
    N = h.get_dim(mean_y, 0)
    M = h.get_dim(K_mm, 0)
    W_inv_part = beta * phi_2 + K_mm
    global phi_200
    phi_200 = tf.matrix_solve(W_inv_part, tf.transpose(phi_1))
    W = beta * np.eye(N) - tf.square(beta) * h.Mul(
        phi_1, tf.matrix_solve(W_inv_part, tf.transpose(phi_1)))
    # Computations
    bound += N * tf.log(beta)
    bound += h.log_det(K_mm + 1e-3 * np.eye(M))
    bound -= h.Mul(tf.transpose(mean_y), W, mean_y)
    global matrix_determinant
    matrix_determinant = tf.ones(
        1
    )  #h.log_det(W_inv_part+1e2*np.eye(M))#-1e-40*tf.exp(h.log_det(W_inv_part))

    bound -= h.log_det(W_inv_part +
                       1e-3 * tf.reduce_mean(W_inv_part) * np.eye(M))
    bound -= beta * phi_0
    bound += beta * tf.trace(tf.cholesky_solve(tf.cholesky(K_mm), phi_2))
    bound = bound * 0.5
    return bound
Beispiel #27
0
    def _kernel(self, X1, X2, jitter=False, debug=False):
        self.inducing_locations = self.context.parameters.get(
            name='inducing_locations_{r}'.format(
                r=self.context.use_latent_f_direction))

        k_g_zz = self.K1.k1.kernel(self.inducing_locations,
                                   self.inducing_locations,
                                   jitter=True)
        k_g_z_x2 = self.K2.k1.kernel(self.inducing_locations, X2)
        k_g_x1_z = self.K2.k1.kernel(X1, self.inducing_locations)

        #k_g_x1_x2 = self.K2.kernel(X1, X2)
        #k_2_x1_x2 = self.K1.kernel(X1, X2)

        K = tf.matmul(k_g_x1_z, tf.cholesky_solve(tf.cholesky(k_g_zz),
                                                  k_g_z_x2))

        if False:
            K = tf.Print(K, [X1], 'X1')
            K = tf.Print(K, [k_g_x1_z], 'k_g_x1_z_latent')
            K = tf.Print(K, [k_g_z_x2], 'k_g_z_x2_latent')
            K = tf.Print(K, [K], 'K_latent', summarize=500)

        #K = k_2_x1_x2

        #K = k_2_x1_x2
        K = self.K1.k1.kernel(X1, X2)

        K = tf.multiply(K, self.K2.k2.kernel(X1, X2, jitter=jitter))

        #K = k_g_x1_x2

        if jitter:
            K = util.add_jitter(K, self.context.jitter)
        return K
Beispiel #28
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
Beispiel #29
0
    def propagate(self, mu, Sigma):
        """
        Implementation of the function propagate
        required by GDSM. (see template.py)
        """

        do_batch = mu.get_shape().ndims != 2
        batch_shape = tf.shape(mu)[:-2]

        l, L, H = self._propagation_terms(mu, Sigma)

        # Compute predicted mean
        l_rank2 = l if not do_batch else tf.reshape(l, [-1, self._n])
        m = tf.matmul(l_rank2, self._beta)
        if do_batch:
            m = tf.reshape(m, tf.concat([batch_shape, [self.output_dim, 1]],
                                        0))
        else:
            m = tf.transpose(m)

        L_rank2 = L if not do_batch else tf.reshape(L, [-1, self._n])

        # Compute predicted output variance
        temp = tf.matmul(L_rank2, self._beta)
        if do_batch:
            temp = tf.reshape(temp, [-1, self._n, self.output_dim])
            temp = tf.matrix_transpose(temp)
            temp = tf.reshape(temp, [-1, self._n])
            temp = tf.matmul(temp, self._beta)
            temp = tf.reshape(
                temp,
                tf.concat([batch_shape, [self.output_dim, self.output_dim]],
                          0))
            L_rank2 = tf.transpose(L_rank2)
        else:
            temp = tf.matmul(temp, self._beta, transpose_a=True)

        C = temp - tf.matmul(m, m, transpose_b=True)

        temp = tf.cholesky_solve(self._Kchol, L_rank2)
        if do_batch:
            temp = tf.reshape(
                tf.transpose(temp),
                tf.concat([batch_shape, [1, 1, self._n, self._n]], 0))

        C += self._Iout * \
            (self._sigma2 + self._sigma_noise2 - tf.trace(temp))

        # Compute input/output covariance
        C_oi = tf.matmul(l * tf.transpose(self._beta),
                         H - mu,
                         transpose_b=True)

        if self._id_mean:
            m += mu
            C += Sigma + C_oi + tf.matrix_transpose(C_oi)
            C_oi += Sigma

        return m, C, C_oi
Beispiel #30
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
Beispiel #31
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
Beispiel #32
0
    def _build_cross_entropy_sum(self, k1, m1, s1, n, debug=False):
        k_chol = tf.cholesky(k1)
        m1 = tf.expand_dims(m1, 1)

        d = tf.trace(tf.cholesky_solve(k_chol, s1))
        p = util.log_normal_chol(x=0.0, mu=m1, chol=k_chol, n=n)

        result = p - 0.5 * d
        return result
Beispiel #33
0
def predict2():
    # predicitions
    cov=h.Mul(K_mm_2,tf.matrix_inverse(K_mm_2+K_mnnm_2/tf.square(sigma_2)),K_mm_2)
    cov_chol=tf.cholesky(cov)
    mu=h.Mul(K_mm_2,tf.cholesky_solve(cov_chol,K_mn_2),Ytr)/tf.square(sigma_2)
    mean=h.Mul(K_nm_2,tf.matrix_solve(K_mm_1,mu))
    variance=K_nn_2-h.Mul(K_nm_2,h.safe_chol(K_mm_2,tf.transpose(K_nm_2)))
    var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma_2),[N,1]))
    return mean, var_terms
Beispiel #34
0
 def _cho():
     # batch_size, n, n
     L = tf.cholesky(Kf,name='L')
     # batch_size, n,1
     alpha = tf.cholesky_solve(L, dy, name='alpha')
     data_fit = 0.5 * tf.reduce_sum(dy*alpha,axis=-1)[...,0]
     complexity = tf.trace(tf.log(L))
     scale = 0.5*n*np.log(2.*np.pi)
     return data_fit + complexity + scale
Beispiel #35
0
 def test_works_with_five_different_random_pos_def_matricies(self):
   with self.test_session():
     for n in range(1, 6):
       for np_type in [np.float32, np.float64]:
         matrix = _random_pd_matrix(n, self.rng).astype(np_type)
         chol = tf.cholesky(matrix)
         for k in range(1, 3):
           rhs = self.rng.randn(n, k).astype(np_type)
           x = tf.cholesky_solve(chol, rhs)
           self.assertAllClose(rhs, tf.matmul(matrix, x).eval(), atol=1e-4)
Beispiel #36
0
def Bound1(y,S,Kmm,Knm,Tr_Knn,sigma):
#matrices to be used
    Kmm_chol=tf.cholesky(Kmm)
    sig_2=tf.square(sigma)
    N=h.get_dim(y,0)
    Q_nn=h.Mul(Knm,tf.cholesky_solve(Kmm_chol,tf.transpose(Knm)))
    Q_I_chol=tf.cholesky(sig_2*np.eye(N)+Q_nn)
    bound=-0.5*(Tr_Knn-Q_nn)/sig_2
    bound+=h.multivariate_normal(y, tf.zeros([N,1],dtype=tf.float32), Q_I_chol)
    bound-=0.5*tf.reduce_sum(S)/sig_2+0.1*0.5*tf.reduce_sum(tf.log(S))
    return bound
Beispiel #37
0
  def testDiffusionBehavesCorrectly(self):
    """Test that for the SGLD finds minimum of the 3D Gaussian energy."""
    with self.test_session(graph=tf.Graph()) as sess:
      # Set up random seed for the optimizer
      tf.set_random_seed(42)
      dtype = np.float32
      true_mean = dtype([0, 0, 0])
      true_cov = dtype([[1, 0.25, 0.25], [0.25, 1, 0.25], [0.25, 0.25, 1]])
      # Loss is defined through the Cholesky decomposition
      chol = tf.linalg.cholesky(true_cov)
      var_1 = tf.get_variable(
          'var_1', initializer=[1., 1.])
      var_2 = tf.get_variable(
          'var_2', initializer=[1.])

      var = tf.concat([var_1, var_2], axis=-1)
      # Partially defined loss function
      loss_part = tf.cholesky_solve(chol, tf.expand_dims(var, -1))
      # Loss function
      loss = 0.5 * tf.squeeze(tf.matmul(loss_part, tf.expand_dims(var, -1),
                                        transpose_a=True))

      # Set up the learning rate with a polynomial decay
      global_step = tf.Variable(0, trainable=False)
      starter_learning_rate = .3
      end_learning_rate = 1e-4
      decay_steps = 1e4
      learning_rate = tf.train.polynomial_decay(starter_learning_rate,
                                                global_step, decay_steps,
                                                end_learning_rate, power=1.)

      # Set up the optimizer
      optimizer_kernel = tfp.optimizer.StochasticGradientLangevinDynamics(
          learning_rate=learning_rate, preconditioner_decay_rate=0.99)

      optimizer = optimizer_kernel.minimize(loss)

      init = tf.global_variables_initializer()
      # Number of training steps
      training_steps = 5000
      # Record the steps as and treat them as samples
      samples = [np.zeros([training_steps, 2]), np.zeros([training_steps, 1])]
      sess.run(init)
      for step in range(training_steps):
        sess.run([optimizer, loss])
        sample = [sess.run(var_1), sess.run(var_2)]
        samples[0][step, :] = sample[0]
        samples[1][step, :] = sample[1]

    samples_ = np.concatenate(samples, axis=-1)
    sample_mean = np.mean(samples_, 0)
    self.assertAllClose(sample_mean, true_mean, atol=0.1, rtol=0.1)
Beispiel #38
0
 def test_works_with_five_different_random_pos_def_matrices(self):
   with self.test_session():
     for n in range(1, 6):
       for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
         # Create 2 x n x n matrix
         array = np.array(
             [_random_pd_matrix(n, self.rng), _random_pd_matrix(n, self.rng)]
         ).astype(np_type)
         chol = tf.cholesky(array)
         for k in range(1, 3):
           rhs = self.rng.randn(2, n, k).astype(np_type)
           x = tf.cholesky_solve(chol, rhs)
           self.assertAllClose(rhs, tf.matmul(array, x).eval(), atol=atol)
Beispiel #39
0
def Bound2(phi_0,phi_1,phi_2,sigma_noise,K_mm,mean_y):
    # Preliminary Bound
    beta=1/tf.square(sigma_noise)
    bound=0
    N=h.get_dim(mean_y,0)
    M=h.get_dim(K_mm,0)
    W_inv_part=beta*phi_2+K_mm
    global phi_200
    phi_200=tf.matrix_solve(W_inv_part,tf.transpose(phi_1))
    W=beta*np.eye(N)-tf.square(beta)*h.Mul(phi_1,tf.matrix_solve(W_inv_part,tf.transpose(phi_1)))
    # Computations
    bound+=N*tf.log(beta)
    bound+=h.log_det(K_mm+1e-3*np.eye(M))
    bound-=h.Mul(tf.transpose(mean_y),W,mean_y)
    global matrix_determinant
    matrix_determinant=tf.ones(1) #h.log_det(W_inv_part+1e2*np.eye(M))#-1e-40*tf.exp(h.log_det(W_inv_part))


    bound-=h.log_det(W_inv_part+1e-3*tf.reduce_mean(W_inv_part)*np.eye(M))
    bound-=beta*phi_0
    bound+=beta*tf.trace(tf.cholesky_solve(tf.cholesky(K_mm),phi_2))
    bound=bound*0.5
    return bound
Beispiel #40
0
def safe_chol(A,RHS):
    conditioned=condition((A+tf.transpose(A))/2)
    chol=tf.cholesky(conditioned)
    return tf.cholesky_solve(chol,RHS)
Beispiel #41
0
  def build_model(self):
    """Defines the GP model.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

    logging.info("Initializing model %s.", self.name)
    self.global_step = tf.train.get_or_create_global_step()

    # Define state for the model (inputs, etc.)
    self.x_train = tf.get_variable(
        "training_data",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_in], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.y_train = tf.get_variable(
        "training_labels",
        initializer=tf.zeros([self.hparams.batch_size, 1], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.weights_train = tf.get_variable(
        "weights_train",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_out], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False)
    self.input_w_op = tf.assign(
        self.weights_train, self.weights, validate_shape=False)

    self.input_std = tf.get_variable(
        "data_standard_deviation",
        initializer=tf.ones([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=False)
    self.input_mean = tf.get_variable(
        "data_mean",
        initializer=tf.zeros([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=True)

    # GP Hyperparameters
    self.noise = tf.get_variable(
        "noise", initializer=tf.cast(0.0, dtype=tf.float64))
    self.amplitude = tf.get_variable(
        "amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.amplitude_linear = tf.get_variable(
        "linear_amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.length_scales = tf.get_variable(
        "length_scales", initializer=tf.zeros([1, self.n_in], dtype=tf.float64))
    self.length_scales_lin = tf.get_variable(
        "length_scales_linear",
        initializer=tf.zeros([1, self.n_in], dtype=tf.float64))

    # Latent embeddings of the different outputs for task covariance
    self.task_vectors = tf.get_variable(
        "latent_task_vectors",
        initializer=tf.random_normal(
            [self.n_out, self.task_latent_dim], dtype=tf.float64))

    # Normalize outputs across each dimension
    # Since we have different numbers of observations across each task, we
    # normalize by their respective counts.
    index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0),
                                   self.n_out)
    index_counts = tf.where(index_counts > 0, index_counts,
                            tf.ones(tf.shape(index_counts), dtype=tf.float64))
    self.mean_op = tf.assign(self.input_mean,
                             tf.reduce_sum(self.y, axis=0) / index_counts)
    self.var_op = tf.assign(
        self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square(
            self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0)
                                / index_counts))

    with tf.control_dependencies([self.var_op]):
      y_normed = self.atleast_2d(
          (self.y - self.input_mean) / self.input_std, self.n_out)
      y_normed = self.atleast_2d(tf.boolean_mask(y_normed, self.weights > 0), 1)
    self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False)

    # Observation noise
    alpha = tf.nn.softplus(self.noise) + 1e-6

    # Covariance
    with tf.control_dependencies([self.input_op, self.input_w_op, self.out_op]):
      self.self_cov = (self.cov(self.x_in, self.x_in) *
                       self.task_cov(self.weights, self.weights) +
                       tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha)

    self.chol = tf.cholesky(self.self_cov)
    self.kinv = tf.cholesky_solve(self.chol, tf.eye(tf.shape(self.x_in)[0],
                                                    dtype=tf.float64))

    self.input_inv = tf.Variable(
        tf.eye(self.hparams.batch_size, dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_cov_op = tf.assign(self.input_inv, self.kinv,
                                  validate_shape=False)

    # Log determinant by taking the singular values along the diagonal
    # of self.chol
    with tf.control_dependencies([self.input_cov_op]):
      logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.chol) + 1e-16))

    # Log Marginal likelihood
    self.marginal_ll = -tf.reduce_sum(-0.5 * tf.matmul(
        tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet -
                                      0.5 * self.n * np.log(2 * np.pi))

    zero = tf.cast(0., dtype=tf.float64)
    one = tf.cast(1., dtype=tf.float64)
    standard_normal = tfd.Normal(loc=zero, scale=one)

    # Loss is marginal likelihood and priors
    self.loss = tf.reduce_sum(
        self.marginal_ll -
        (standard_normal.log_prob(self.amplitude) +
         standard_normal.log_prob(tf.exp(self.noise)) +
         standard_normal.log_prob(self.amplitude_linear) +
         tfd.Normal(loc=zero, scale=one * 10.).log_prob(
             self.task_vectors))
    )

    # Optimizer for hyperparameters
    optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr)
    vars_to_optimize = [
        self.amplitude, self.length_scales, self.length_scales_lin,
        self.amplitude_linear, self.noise, self.input_mean
    ]

    if self.learn_embeddings:
      vars_to_optimize.append(self.task_vectors)
    grads = optimizer.compute_gradients(self.loss, vars_to_optimize)
    self.train_op = optimizer.apply_gradients(grads,
                                              global_step=self.global_step)

    # Predictions for test data
    self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x)

    # create tensorboard metrics
    self.create_summaries()
    self.summary_writer = tf.summary.FileWriter("{}/graph_{}".format(
        FLAGS.logdir, self.name), self.sess.graph)
    self.check = tf.add_check_numerics_ops()
Beispiel #42
0
def solve_linear(A, L, w_x, w_y):
    rhs = w_x - tf.matmul(A, w_y, transpose_a=True)
    z_x = tf.cholesky_solve(L, rhs)
    z_y = w_y + tf.matmul(A, z_x)
    return z_x, z_y
Beispiel #43
0
def safe_chol(A,RHS):
    chol=tf.cholesky(condition(A))

    return tf.cholesky_solve(chol,RHS)