예제 #1
0
파일: gplvm.py 프로젝트: blutooth/dgp
    def build_predict(self, Xnew, full_cov=False):
        """
        Compute the mean and variance of the latent function at some new points
        Xnew. Note that this is very similar to the SGPR prediction, for whcih
        there are notes in the SGPR notebook.
        """
        num_inducing = tf.shape(self.Z)[0]
        psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        Kus = self.kern.K(self.Z, Xnew)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)
        L = tf.cholesky(Kuu)

        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma
        tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
        mean = tf.matmul(tf.transpose(tmp2), c)
        if full_cov:
            var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
                - tf.matmul(tf.transpose(tmp1), tmp1)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)
        else:
            var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
                - tf.reduce_sum(tf.square(tmp1), 0)
            shape = tf.pack([1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)
        return mean + self.mean_function(Xnew), var
    def build_predict(self, Xnew , full_cov=False):
       
        
        err = self.Y
        Kuf = self.RBF(self.Z, self.X)
        Kuu = self.RBF(self.Z,self.Z) + eye(self.num_inducing) * 1e-6
        Kus = self.RBF(self.Z, Xnew)
        sigma = tf.sqrt(self.likelihood_variance)
        L = tf.cholesky(Kuu)
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
        B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
        LB = tf.cholesky(B)
        Aerr = tf.matmul(A, err)
        c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
        tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
        mean = tf.matmul(tf.transpose(tmp2), c)
        
        if full_cov:

            var = self.RBF(Xnew, Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
                - tf.matmul(tf.transpose(tmp1), tmp1)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)

        else:

            var = self.RBF(Xnew, Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
                - tf.reduce_sum(tf.square(tmp1), 0)
            shape = tf.pack([1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)


		return mean , var
예제 #3
0
파일: sgpr.py 프로젝트: agarbuno/GPflow
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook. 
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        err =  self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance)
        AAT = tf.matmul(A, tf.transpose(A))
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance)

        #compute log marginal bound
        bound = -0.5*tf.cast(num_data*output_dim, tf.float64)*np.log(2*np.pi)
        bound += -tf.cast(output_dim, tf.float64)*tf.reduce_sum(tf.log(tf.user_ops.get_diag(LB)))
        bound += -0.5*tf.cast(num_data*output_dim, tf.float64)*tf.log(self.likelihood.variance)
        bound += -0.5*tf.reduce_sum(tf.square(err))/self.likelihood.variance
        bound += 0.5*tf.reduce_sum(tf.square(c))
        bound += -0.5*(tf.reduce_sum(Kdiag)/self.likelihood.variance - tf.reduce_sum(tf.user_ops.get_diag(AAT)))

        return bound
예제 #4
0
 def testNonSquareMatrix(self):
   with self.assertRaises(ValueError):
     tf.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]]))
   with self.assertRaises(ValueError):
     tf.cholesky(
         np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]
                  ]))
예제 #5
0
파일: sgpr.py 프로젝트: gbohner/GPflow
 def build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = tf.shape(self.Z)[0]
     err = self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z, self.X)
     Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
     Kus = self.kern.K(self.Z, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
             - tf.matmul(tf.transpose(tmp1), tmp1)
         shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
             - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.pack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
예제 #6
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
예제 #7
0
    def compute_upper_bound(self):
        num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)

        Kdiag = self.kern.Kdiag(self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        Kuf = self.feature.Kuf(self.kern, self.X)

        L = tf.cholesky(Kuu)
        LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))

        LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True)
        # Using the Trace bound, from Titsias' presentation
        c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0)
        # Kff = self.kern.K(self.X)
        # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True)

        # Alternative bound on max eigenval:
        # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0))
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance)
        logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB)))

        LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))
        v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True)
        quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0)

        return const + logdet + quad
예제 #8
0
파일: maxKL.py 프로젝트: blutooth/dgp
def gauss_kl(min_q_mu, q_sq,K):
    q_mu=-1*min_q_mu

    #q_sqrt=tf.cholesky(tf.squeeze(q_sqrt))
        # K is a variance...we sqrt later
    '''
    N=1
    Q=5
    q_mu=tf.random_normal([Q,1],dtype=tf.float64)
    q_var=tf.random_normal([Q,Q],dtype=tf.float64)
    q_var=q_var+tf.transpose(q_var [1,0])+1e+1*np.eye(Q)
    K=q_var
    q_sqrt=tf.cholesky(q_var)
    q_sqrt=tf.expand_dims(q_sqrt,-1)
    num_latent=1
    s=tf.Session()
    s.run(tf.initialize_all_variables())
    '''
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).

    q_sqrt=tf.cholesky(K)
    L = tf.cholesky(q_sq)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL +=   0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0], tf.float64)

    Lq = tf.batch_matrix_band_part(q_sqrt, -1, 0)
    # Log determinant of q covariance:
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
    LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    """
    V2=tf.cholesky(K)
    V1=tf.cholesky(q_sq)
    KL=h.Mul(tf.transpose(q_mu),tf.cholesky_solve(V2,q_mu))
    KL+=tf.trace(tf.cholesky_solve(V2,q_sq))
    KL-=h.get_dim(K,0)
    KL+=tf.reduce_sum(2*tf.log(tf.diag_part(V2))-2*tf.log(tf.diag_part(V1)))
    return KL/2
예제 #9
0
파일: helper.py 프로젝트: blutooth/gp
def F_bound2_v2(y,S,Kmm,Knm,Kmnnm,Tr_Knn,sigma):
    #matrices to be used
    N=get_dim(y,0)
    Kmm_chol=tf.cholesky(Kmm)
    Q_nn=tf.square(sigma)*np.eye(N)+Mul(Knm,tf.cholesky_solve(Kmm_chol,tf.transpose(Knm)))
    bound=-0.5*(Tr_Knn-tf.trace(tf.cholesky_solve(Kmm_chol,Kmnnm)))/tf.square(sigma)
    bound+=multivariate_normal(y, tf.zeros([N,1],dtype=tf.float32), tf.cholesky(Q_nn))
    return bound
예제 #10
0
파일: helper.py 프로젝트: blutooth/gp
def log_det(Z):
    #conditioned=condition(Z)
    Z=(Z+tf.transpose(Z))/2
    return 2*tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(Z))))

    chol=tf.cholesky(Z)
    logdet=2*tf.reduce_sum(tf.log(tf.diag_part(chol)))
    return logdet
예제 #11
0
def natural_to_meanvarsqrt(nat_1, nat_2):
    var_sqrt_inv = tf.cholesky(-2 * nat_2)
    var_sqrt = _inverse_lower_triangular(var_sqrt_inv)
    S = tf.matmul(var_sqrt, var_sqrt, transpose_a=True)
    mu = tf.matmul(S, nat_1)
    # We need the decomposition of S as L L^T, not as L^T L,
    # hence we need another cholesky.
    return mu, tf.cholesky(S)
예제 #12
0
def multivariate_gaussian_log_density(x, mu,
                                      Sigma=None, L=None,
                                      prec=None, L_prec=None):
    """
    Assume X is a single vector described by a multivariate Gaussian
    distribution with x ~ N(mu, Sigma).

    We accept parameterization in terms of the covariance matrix or
    its cholesky decomposition L (more efficient if available), or the
    precision matrix or its cholesky decomposition L_prec.
    The latter is useful when representing a Gaussian in its natural 
    parameterization. Note that we still require the explicit mean mu
    (not the natural parameter prec*mu) since I'm too lazy to cover
    all the permutations of possible arguments (though this should be
    straightforward). 

    """
    s = extract_shape(x)
    try:
        n, = s
    except:
        n, m = s
        assert(m==1)

    if L is None and Sigma is not None:
        L = tf.cholesky(Sigma)        
    if L_prec is None and prec is not None:
        L_prec = tf.cholesky(prec)
        
    if L is not None:
        neg_half_logdet = -tf.reduce_sum(tf.log(tf.diag_part(L)))
    else:
        assert(L_prec is not None)
        neg_half_logdet = tf.reduce_sum(tf.log(tf.diag_part(L_prec)))
        
    d = tf.reshape(x - mu, (n,1))
    if L is not None:
        alpha = tf.matrix_triangular_solve(L, d, lower=True)
        exponential_part= tf.reduce_sum(tf.square(alpha))
    elif prec is not None:
        d = tf.reshape(d, (n, 1))
        exponential_part = tf.reduce_sum(d * tf.matmul(prec, d))
    else:
        assert(L_prec is not None)
        d = tf.reshape(d, (1, n))
        alpha = tf.matmul(d, L_prec)
        exponential_part= tf.reduce_sum(tf.square(alpha))

    n_log2pi = n * 1.83787706641
    logp =  -0.5 * n_log2pi
    logp += neg_half_logdet
    logp += -0.5 * exponential_part
        
    return logp
예제 #13
0
def Bound1(y,S,Kmm,Knm,Tr_Knn,sigma):
#matrices to be used
    Kmm_chol=tf.cholesky(Kmm)
    sig_2=tf.square(sigma)
    N=h.get_dim(y,0)
    Q_nn=h.Mul(Knm,tf.cholesky_solve(Kmm_chol,tf.transpose(Knm)))
    Q_I_chol=tf.cholesky(sig_2*np.eye(N)+Q_nn)
    bound=-0.5*(Tr_Knn-Q_nn)/sig_2
    bound+=h.multivariate_normal(y, tf.zeros([N,1],dtype=tf.float32), Q_I_chol)
    bound-=0.5*tf.reduce_sum(S)/sig_2+0.1*0.5*tf.reduce_sum(tf.log(S))
    return bound
예제 #14
0
파일: gpr.py 프로젝트: erenis/GPflow
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]])
        return fmean, fvar
예제 #15
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
예제 #16
0
파일: vgp.py 프로젝트: sanket-kamthe/GPflow
    def _build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R
        This method computes the variational lower bound on the likelihood,
        which is:
            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0),
                    [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.cholesky(A)
        Li = tf.matrix_triangular_solve(L, I)
        tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
        f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
                    tf.reduce_sum(K_alpha * self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
예제 #17
0
파일: laplace.py 프로젝트: wujsAct/edward
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize `PointMass` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
예제 #18
0
파일: normal.py 프로젝트: JoyceYa/edward
def main(_):
  ed.set_seed(42)

  # MODEL
  z = MultivariateNormalTriL(
      loc=tf.ones(2),
      scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]])))

  # INFERENCE
  qz = Empirical(params=tf.get_variable("qz/params", [1000, 2]))

  inference = ed.HMC({z: qz})
  inference.run()

  # CRITICISM
  sess = ed.get_session()
  mean, stddev = sess.run([qz.mean(), qz.stddev()])
  print("Inferred posterior mean:")
  print(mean)
  print("Inferred posterior stddev:")
  print(stddev)

  fig, ax = plt.subplots()
  trace = sess.run(qz.params)
  ax.scatter(trace[:, 0], trace[:, 1], marker=".")
  mvn_plot_contours(z, ax=ax)
  plt.show()
예제 #19
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
예제 #20
0
  def runFiniteDifferences(self,
                           shapes,
                           dtypes=(tf.float32, tf.float64),
                           scalarTest=False):
    with self.test_session(use_gpu=False):
      for shape in shapes:
        for batch in False, True:
          for dtype in dtypes:
            if not scalarTest:
              x = tf.constant(np.random.randn(shape[0], shape[1]), dtype)
              tensor = tf.matmul(x, tf.transpose(x)) / shape[0]
            else:
              # This is designed to be a faster test for larger matrices.
              x = tf.constant(np.random.randn(), dtype)
              R = tf.constant(np.random.randn(shape[0], shape[1]), dtype)
              e = tf.mul(R, x)
              tensor = tf.matmul(e, tf.transpose(e)) / shape[0]

            # Inner-most matrices in tensor are positive definite.
            if batch:
              tensor = tf.tile(tf.expand_dims(tensor, 0), [4, 1, 1])
            y = tf.cholesky(tensor)
            if scalarTest:
              y = tf.reduce_mean(y)
            error = tf.test.compute_gradient_error(x, x._shape_as_list(), y,
                                                   y._shape_as_list())
            tf.logging.info("error = %f", error)
            if dtype == tf.float64:
              self.assertLess(error, 1e-5)
            else:
              self.assertLess(error, 3e-3)
예제 #21
0
    def test_whiten(self):
        """
        make sure that predicting using the whitened representation is the
        sameas the non-whitened one.
        """
        with self.test_context() as sess:
            rng = np.random.RandomState(0)
            Xs, X, F, k, num_data, feed_dict = self.prepare()
            k.compile(session=sess)

            F_sqrt = tf.placeholder(settings.float_type, [num_data, 1])
            F_sqrt_data = rng.rand(num_data, 1)
            feed_dict[F_sqrt] = F_sqrt_data

            K = k.K(X)
            L = tf.cholesky(K)
            V = tf.matrix_triangular_solve(L, F, lower=True)
            V_sqrt = tf.matrix_triangular_solve(L, tf.diag(F_sqrt[:, 0]), lower=True)[None, :, :]

            Fstar_mean, Fstar_var = gpflow.conditionals.conditional(
                Xs, X, k, F, q_sqrt=F_sqrt)
            Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(
                Xs, X, k, V, q_sqrt=V_sqrt, white=True)

            mean_difference = sess.run(Fstar_w_mean - Fstar_mean, feed_dict=feed_dict)
            var_difference = sess.run(Fstar_w_var - Fstar_var, feed_dict=feed_dict)

            assert_allclose(mean_difference, 0, atol=4)
            assert_allclose(var_difference, 0, atol=4)
예제 #22
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
예제 #23
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
예제 #24
0
파일: vgp.py 프로젝트: agarbuno/GPflow
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower lound on the likelihood, which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        f_mean = tf.matmul(K, self.q_alpha) + self.mean_function(self.X)
        #for each of the data-dimensions (columns of Y), find the diagonal of the
        #variance, and also relevant parts of the KL.
        f_var, A_logdet, trAi = [], tf.zeros((1,), tf.float64), tf.zeros((1,), tf.float64)
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            B = tf.expand_dims(b, 1)
            A = eye(self.num_data) + K*B*tf.transpose(B)
            L = tf.cholesky(A)
            Li = tf.matrix_triangular_solve(L, eye(self.num_data), lower=True)
            LiBi = Li / b
            #full_sigma:return tf.diag(b**-2) - LiBi.T.dot(LiBi)
            f_var.append(1./tf.square(b) - tf.reduce_sum(tf.square(LiBi),0))
            A_logdet += 2*tf.reduce_sum(tf.log(tf.user_ops.get_diag(L)))
            trAi += tf.reduce_sum(tf.square(Li))

        f_var = tf.transpose(tf.pack(f_var))

        KL = 0.5*(A_logdet + trAi - self.num_data*self.num_latent + tf.reduce_sum(f_mean*self.q_alpha))

        return tf.reduce_sum(self.likelihood.variational_expectations(f_mean, f_var, self.Y)) - KL
예제 #25
0
    def __init__(self, mean, cov, d=None):

        mean = tf.convert_to_tensor(mean)
        cov = tf.convert_to_tensor(cov)
        
        try:
            d1, = util.extract_shape(mean)
            mean = tf.reshape(mean, (d1,1))
        except:
            d1,k = util.extract_shape(mean)
            assert(k == 1)

        d2,_ = util.extract_shape(cov)
        assert(d1==d2)
        if d is None:
            d = d1
        else:
            assert(d==d1)
            
        super(MVGaussianMeanCov, self).__init__(d=d)
        
        self._mean = mean
        self._cov = cov
        
        self._L_cov = tf.cholesky(cov)
        self._entropy = bf.dists.multivariate_gaussian_entropy(L=self._L_cov)

        L_prec_transpose = util.triangular_inv(self._L_cov)
        self._L_prec = tf.transpose(L_prec_transpose)
        self._prec = tf.matmul(self._L_prec, L_prec_transpose)
        self._prec_mean = tf.matmul(self._prec, self._mean)
예제 #26
0
def gp_predict_whitened(Xnew, X, kern, V):
    """
    Given a whitened representation of the GP at the points X (V), produce the
    mean and variance of the GP at the points Xnew (F*).

    The GP has been centered (whitened) so that 

        p(v) = N( 0, I)
        f = L v ,

    and so

        p(f) = N(0, LL^T) = N(0, K).

    We assume K independent GPs, represented by the columns of V. The GP conditional is:
    
        p(F*[:,i] | V[:,i]) = N (K_{*f} L^{-T} V[:,i],  K_{**} - K_{*f}L^{-1} L^{-T} K_{f*})

    Xnew is a data matrix, size N* x D
    X is a data matrix, size N x D
    V is a matrix containing whitened GP values, size N x K

    See also:
        gaussian_gp_predict_whitened -- where there is no uncertainty in V
        gp_predict -- same, without the whitening
    """
    Kd = kern.Kdiag(Xnew)
    Kx = kern.K(X, Xnew)
    K = kern.K(X)
    L = tf.cholesky(K)
    A = tf.user_ops.triangular_solve(L, Kx, 'lower')
    fmean = tf.matmul(tf.transpose(A), V)
    fvar = Kd - tf.reduce_sum(tf.square(A), 0)
    return fmean, tf.expand_dims(fvar, 1) * tf.ones_like(V[0,:])
예제 #27
0
파일: vgp.py 프로젝트: sanket-kamthe/GPflow
    def _build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by
            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
        Here we project this to F*, the values of the GP at Xnew which is given
        by
           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )
        """

        # compute kernel things
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X)

        # predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew)

        # predictive var
        A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda)))
        L = tf.cholesky(A)
        Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
        LiKx = tf.matrix_triangular_solve(L, Kx_tiled)
        if full_cov:
            f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True)
        else:
            f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
        return f_mean, tf.transpose(f_var)
예제 #28
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
예제 #29
0
파일: vgp.py 프로젝트: agarbuno/GPflow
    def build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by

            q(f) = N(f | K alpha, [K^-1 + diag(lambda**2)]^-1)

        Here we project this to F*, the values of the GP at Xnew which is given by

           q(F*) = N ( F* | K_{*F} alpha , K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} )

        """

        #compute kernelly things
        Kx = self.kern.K(Xnew, self.X)
        K = self.kern.K(self.X)


        #predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha) + self.mean_function(Xnew)

        #predictive var
        f_var = []
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            A = K + tf.diag(1./tf.square(b))
            L = tf.cholesky(A)
            LiKx = tf.matrix_triangular_solve(L, tf.transpose(Kx), lower=True)
            if full_cov:
                f_var.append( self.kern.K(Xnew)- tf.matmul(tf.transpose(LiKx),LiKx) )
            else:
                f_var.append( self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx),0) )
        f_var = tf.pack(f_var)
        return f_mean, tf.transpose(f_var)
예제 #30
0
    def logpdf(self, x, mean=None, cov=1):
        """Log of the probability density function.

        Parameters
        ----------
        x : tf.Tensor
            A 1-D or 2-D tensor.
        mean : tf.Tensor, optional
            A 1-D tensor. Defaults to zero mean.
        cov : tf.Tensor, optional
            A 1-D or 2-D tensor. Defaults to identity matrix.

        Returns
        -------
        tf.Tensor
            A tensor of one dimension less than the input.
        """
        x = tf.cast(x, dtype=tf.float32)
        x_shape = get_dims(x)
        if len(x_shape) == 1:
            d = x_shape[0]
        else:
            d = x_shape[1]

        if mean is None:
            r = x
        else:
            mean = tf.cast(mean, dtype=tf.float32)
            r = x - mean

        if cov is 1:
            L_inv = tf.diag(tf.ones([d]))
            det_cov = tf.constant(1.0)
        else:
            cov = tf.cast(cov, dtype=tf.float32)
            if len(cov.get_shape()) == 1: # vector
                L_inv = tf.diag(1.0 / tf.sqrt(cov))
                det_cov = tf.reduce_prod(cov)
            else: # matrix
                L = tf.cholesky(cov)
                L_inv = tf.matrix_inverse(L)
                det_cov = tf.pow(tf.reduce_prod(tf.diag_part(L)), 2)

        lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov)
        if len(x_shape) == 1: # vector
            r = tf.reshape(r, shape=(d, 1))
            inner = tf.matmul(L_inv, r)
            lps -= 0.5 * tf.matmul(inner, inner, transpose_a=True)
            return tf.squeeze(lps)
        else: # matrix
            # TODO vectorize further
            out = []
            for r_vec in tf.unpack(r):
                r_vec = tf.reshape(r_vec, shape=(d, 1))
                inner = tf.matmul(L_inv, r_vec)
                out += [tf.squeeze(lps -
                        0.5 * tf.matmul(inner, inner, transpose_a=True))]

            return tf.pack(out)
예제 #31
0
 def _grad_and_hessian_loss_fn(x):
     loss = _neg_log_likelihood(x)
     grad_loss = tf.gradients(loss, [x])[0]
     hessian_loss = tf.hessians(loss, [x])[0]
     hessian_chol = tf.cholesky(hessian_loss)
     return grad_loss, hessian_chol, tf.ones_like(grad_loss)
varianceM52_pre = tf.Variable(np.log(np.exp(0.1) - 1), dtype=tf.float32)

lengthscaleM52 = tf.nn.softplus(lengthscaleM52_pre)
varianceM52 = tf.nn.softplus(varianceM52_pre)

period_pre = tf.Variable(np.log(np.exp(7.0 * len_init) - 1), dtype=tf.float32)
period_len_pre = tf.Variable(1.0)
period_var_pre = tf.Variable(np.log(np.exp(0.5) - 1), dtype=tf.float32)  #

period = tf.nn.softplus(period_pre)
period_length = tf.nn.softplus(period_len_pre)

Kuu = kernelfx(xu, xu)

fu_loc = tf.zeros((p, m))
fu_scale = tf.cast(tf.cholesky(Kuu + offset * tf.eye(m, dtype=tf.float64),
                               name='fu_scale'),
                   dtype=tf.float32)

u = MultivariateNormalTriL(loc=fu_loc, scale_tril=fu_scale, name='pu')
x = Normal(loc=tf.zeros((M, Q)), scale=1.0)

Kfu = kernelfx(x, xu)

Kff = kernelfx(x, x)

Kuuinv = tf.matrix_inverse(Kuu + offset * tf.eye(m, dtype=tf.float64))
KfuKuuinv = tf.matmul(Kfu, Kuuinv)
KffKuuinvU = [
    tf.reshape(
        tf.matmul(KfuKuuinv,
                  tf.expand_dims(tf.cast(u[i], dtype=tf.float64), axis=1)),
예제 #33
0
 def testNonSquareMatrix(self):
     with self.assertRaises(ValueError):
         tf.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]]))
예제 #34
0
def independent_interdomain_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, full_output_cov=False,
                                        q_sqrt=None, white=False):
    """
    The inducing outputs live in the g-space (R^L).
    Interdomain conditional calculation.

    :param Kmn: M x L x N x P
    :param Kmm: L x M x M
    :param Knn: N x P  or  N x N  or  P x N x N  or  N x P x N x P
    :param f: data matrix, M x L
    :param q_sqrt: L x M x M  or  M x L
    :param full_cov: calculate covariance between inputs
    :param full_output_cov: calculate covariance between outputs
    :param white: use whitened representation
    :return:
        - mean: N x P
        - variance: N x P, N x P x P, P x N x N, N x P x N x P
    """
    logger.debug("independent_interdomain_conditional")
    M, L, N, P = [tf.shape(Kmn)[i] for i in range(Kmn.shape.ndims)]

    Lm = tf.cholesky(Kmm)  # L x M x M

    # Compute the projection matrix A
    Kmn = tf.reshape(tf.transpose(Kmn, (1, 0, 2, 3)), (L, M, N * P))
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)  # L x M x M  *  L x M x NP  ->  L x M x NP
    Ar = tf.reshape(A, (L, M, N, P))

    # compute the covariance due to the conditioning
    if full_cov and full_output_cov:
        fvar = Knn - tf.tensordot(Ar, Ar, [[0, 1], [0, 1]])  # N x P x N x P
    elif full_cov and not full_output_cov:
        At = tf.reshape(tf.transpose(Ar), (P, N, M * L))  # P x N x ML
        fvar = Knn - tf.matmul(At, At, transpose_b=True)  # P x N x N
    elif not full_cov and full_output_cov:
        At = tf.reshape(tf.transpose(Ar, [2, 3, 1, 0]), (N, P, M * L))  # N x P x ML
        fvar = Knn - tf.matmul(At, At, transpose_b=True)  # N x P x P
    elif not full_cov and not full_output_cov:
        fvar = Knn - tf.reshape(tf.reduce_sum(tf.square(A), [0, 1]), (N, P))  # Knn: N x P

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(Lm, Ar)  # L x M x M  *  L x M x NP  ->  L x M x NP
        Ar = tf.reshape(A, (L, M, N, P))

    fmean = tf.tensordot(Ar, f, [[1, 0], [0, 1]])  # N x P

    if q_sqrt is not None:
        if q_sqrt.shape.ndims == 3:
            Lf = tf.matrix_band_part(q_sqrt, -1, 0)  # L x M x M
            LTA = tf.matmul(Lf, A, transpose_a=True)  # L x M x M  *  L x M x NP  ->  L x M x NP
        else:  # q_sqrt M x L
            LTA = (A * tf.transpose(q_sqrt)[..., None])  # L x M x NP

        if full_cov and full_output_cov:
            LTAr = tf.reshape(LTA, (L * M, N * P))
            fvar = fvar + tf.reshape(tf.matmul(LTAr, LTAr, transpose_a=True), (N, P, N, P))
        elif full_cov and not full_output_cov:
            LTAr = tf.transpose(tf.reshape(LTA, (L * M, N, P)), [2, 0, 1])  # P x LM x N
            fvar = fvar + tf.matmul(LTAr, LTAr, transpose_a=True)  # P x N x N
        elif not full_cov and full_output_cov:
            LTAr = tf.transpose(tf.reshape(LTA, (L * M, N, P)), [1, 0, 2])  # N x LM x P
            fvar = fvar + tf.matmul(LTAr, LTAr, transpose_a=True)  # N x P x P
        elif not full_cov and not full_output_cov:
            fvar = fvar + tf.reshape(tf.reduce_sum(tf.square(LTA), (0, 1)), (N, P))
    return fmean, fvar
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()
sess = tf.Session()
x_vals = np.linspace(0, 10, 100)
y_vals = x_vals + np.random.normal(0, 1, 100)
x_vals_column = np.transpose(np.matrix(x_vals))
ones_column = np.transpose(np.matrix(np.repeat(1, 100)))
A = np.column_stack((x_vals_column, ones_column))
b = np.transpose(np.matrix(y_vals))
A_tensor = tf.constant(A)
b_tensor = tf.constant(b)

# print(A_tensor)
tA_A = tf.matmul(tf.transpose(A_tensor), A_tensor)
L = tf.cholesky(tA_A)
tA_b = tf.matmul(tf.transpose(A_tensor), b)
sol1 = tf.matrix_solve(L, tA_b)
sol2 = tf.matrix_solve(tf.transpose(L), sol1)

solution_eval = sess.run(sol2)
slope = solution_eval[0][0]
y_intercept = solution_eval[1][0]

print('slope = ' + str(slope))
print('y_intercept = ' + str(y_intercept))

best_fit = []
for i in x_vals:
    best_fit.append(slope * i + y_intercept)
예제 #36
0
  def build_model(self):
    """Defines the GP model.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

    LOGGING.info("Initializing model %s.", self.name)
    self.global_step = tf.train.get_or_create_global_step()

    # Define state for the model (inputs, etc.)
    self.x_train = tf.get_variable(
        "training_data",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_in], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.y_train = tf.get_variable(
        "training_labels",
        initializer=tf.zeros([self.hparams.batch_size, 1], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.weights_train = tf.get_variable(
        "weights_train",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_out], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False)
    self.input_w_op = tf.assign(
        self.weights_train, self.weights, validate_shape=False)

    self.input_std = tf.get_variable(
        "data_standard_deviation",
        initializer=tf.ones([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=False)
    self.input_mean = tf.get_variable(
        "data_mean",
        initializer=tf.zeros([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=True)

    # GP Hyperparameters
    self.noise = tf.get_variable(
        "noise", initializer=tf.cast(0.0, dtype=tf.float64))
    self.amplitude = tf.get_variable(
        "amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.amplitude_linear = tf.get_variable(
        "linear_amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.length_scales = tf.get_variable(
        "length_scales", initializer=tf.zeros([1, self.n_in], dtype=tf.float64))
    self.length_scales_lin = tf.get_variable(
        "length_scales_linear",
        initializer=tf.zeros([1, self.n_in], dtype=tf.float64))

    # Latent embeddings of the different outputs for task covariance
    self.task_vectors = tf.get_variable(
        "latent_task_vectors",
        initializer=tf.random_normal(
            [self.n_out, self.task_latent_dim], dtype=tf.float64))

    # Normalize outputs across each dimension
    # Since we have different numbers of observations across each task, we
    # normalize by their respective counts.
    index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0),
                                   self.n_out)
    index_counts = tf.where(index_counts > 0, index_counts,
                            tf.ones(tf.shape(index_counts), dtype=tf.float64))
    self.mean_op = tf.assign(self.input_mean,
                             tf.reduce_sum(self.y, axis=0) / index_counts)
    self.var_op = tf.assign(
        self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square(
            self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0)
                                / index_counts))

    with tf.control_dependencies([self.var_op]):
      y_normed = self.atleast_2d(
          (self.y - self.input_mean) / self.input_std, self.n_out)
      y_normed = self.atleast_2d(tf.boolean_mask(y_normed, self.weights > 0), 1)
    self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False)

    # Observation noise
    alpha = tf.nn.softplus(self.noise) + 1e-6

    # Covariance
    with tf.control_dependencies([self.input_op, self.input_w_op, self.out_op]):
      self.self_cov = (self.cov(self.x_in, self.x_in) *
                       self.task_cov(self.weights, self.weights) +
                       tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha)

    self.chol = tf.cholesky(self.self_cov)
    self.kinv = tf.cholesky_solve(self.chol, tf.eye(tf.shape(self.x_in)[0],
                                                    dtype=tf.float64))

    self.input_inv = tf.Variable(
        tf.eye(self.hparams.batch_size, dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_cov_op = tf.assign(self.input_inv, self.kinv,
                                  validate_shape=False)

    # Log determinant by taking the singular values along the diagonal
    # of self.chol
    with tf.control_dependencies([self.input_cov_op]):
      logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.chol) + 1e-16))

    # Log Marginal likelihood
    self.marginal_ll = -tf.reduce_sum(-0.5 * tf.matmul(
        tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet -
                                      0.5 * self.n * np.log(2 * np.pi))

    zero = tf.cast(0., dtype=tf.float64)
    one = tf.cast(1., dtype=tf.float64)
    standard_normal = tfd.Normal(loc=zero, scale=one)

    # Loss is marginal likelihood and priors
    self.loss = tf.reduce_sum(
        self.marginal_ll -
        (standard_normal.log_prob(self.amplitude) +
         standard_normal.log_prob(tf.exp(self.noise)) +
         standard_normal.log_prob(self.amplitude_linear) +
         tfd.Normal(loc=zero, scale=one * 10.).log_prob(
             self.task_vectors))
    )

    # Optimizer for hyperparameters
    optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr)
    vars_to_optimize = [
        self.amplitude, self.length_scales, self.length_scales_lin,
        self.amplitude_linear, self.noise, self.input_mean
    ]

    if self.learn_embeddings:
      vars_to_optimize.append(self.task_vectors)
    grads = optimizer.compute_gradients(self.loss, vars_to_optimize)
    self.train_op = optimizer.apply_gradients(grads,
                                              global_step=self.global_step)

    # Predictions for test data
    self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x)

    # create tensorboard metrics
    self.create_summaries()
    self.summary_writer = tf.summary.FileWriter("{}/graph_{}".format(
        FLAGS.logdir, self.name), self.sess.graph)
    self.check = tf.add_check_numerics_ops()
 def _inverse(self, y):
     return tf.cholesky(y)
예제 #38
0
    def FactorAnalysisMethod(self):
        ''' 
        Build Graph and execute in here
        so don't have to pass variables one by one
        Bad Coding Style but higher programmer productivity
        '''
        trainData = tf.placeholder(tf.float32, shape=[None, self.D], name="trainingData")
        batchSize = tf.shape(trainData)[0] 
        # Build Graph 
        print "trainShape", self.trainData.shape
        print "validShape", self.validData.shape
        print "testShape", self.testData.shape
        factorMean = tf.Variable(tf.random_normal([1, self.D]))
        # Cholesky doesn't accept negative weights
        #factorWeightsConstraint = tf.Variable(tf.random_normal([self.D, self.K]))
        factorWeights = tf.Variable(tf.random_normal([self.D, self.K]))
        factorStdDeviationConstraint = tf.Variable(tf.random_normal([self.D]))

        #factorWeights = tf.exp(factorWeightsConstraint)
        factorTraceCoVariance = tf.matrix_diag(tf.exp(factorStdDeviationConstraint))

        factorCovariance = tf.add(factorTraceCoVariance, tf.matmul(factorWeights, tf.transpose(factorWeights)))
        #factorTraceCoVariance = tf.exp(factorStdDeviationConstraint)
        # factorCovariance = tf.add(tf.diag(factorTraceCoVariance), tf.matmul(factorWeights, tf.transpose(factorWeights)))
        factorCovarianceInv = tf.matrix_inverse(factorCovariance)
        logDeterminantCovariance = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(factorCovariance))))
        # Train Loss
        # xDeductUTranspose = tf.transpose(xDeductU, (0, 2, 1)) # B  * D * 1
        # Could have used trace here, doesn't make a difference to your calculation
        xDeductU = tf.subtract(trainData, factorMean) # B * D
        total = tf.trace(tf.matmul(tf.matmul(xDeductU, factorCovarianceInv), tf.transpose(xDeductU)))
            
        '''
        # METHOD 1
        # TODO: FIXME THIS IS WRONG! MULTIPLYING BATCH SIZE DOESNT FIX THE PROBLEM
        logProbability = tf.multiply(tf.cast(batchSize, tf.float32), (-self.D * tf.log(2.0 * np.pi) - logDeterminantCovariance))/2.0
        logProbability = logProbability + (total/2.0)
        loss = tf.negative(logProbability)
        '''
        
        #total = tf.reduce_sum(tf.multiply(tf.multiply(xDeductU, factorCovarianceInv), xDeductUTranspose))

        # Calculate log probability for entire batch, [B]
        # METHOD 2
        #factorCovarianceInv = tf.add(tf.expand_dims(tf.matrix_inverse(factorCovariance), 0), tf.zeros((batchSize, 1, 1)))
        xExpand= tf.expand_dims(xDeductU, 2) # B * D * 1
        total = tf.reduce_sum(tf.multiply(tf.reduce_sum(tf.multiply(xExpand, factorCovarianceInv)), xDeductU), [1])
        logProbability = (-self.D * tf.log(2.0 * np.pi) - total - logDeterminantCovariance)/2.0
        totalLogProbability = tf.reduce_sum(logProbability) # sum over the entire batch
        loss = tf.negative(totalLogProbability)
        # '''

        train = self.optimizer.minimize(loss)
        # Session
        init = tf.global_variables_initializer()
        sess = tf.InteractiveSession()
        sess.run(init)
        currEpoch = 0
        minAssignTrain = 0
        minAssignValid = 0
        centers = 0
        xAxis = []
        yTrainErr = []
        yValidErr = []
        yTestErr = []
        numUpdate = 0
        step = 0
        currTrainDataShuffle = self.trainData
        feedDictV = {trainData: self.validData}
        feedDictT = {trainData: self.testData}
        while currEpoch < self.numEpoch:
            #np.random.shuffle(self.trainData) # Shuffle Batches
            step = 0
            while step*self.miniBatchSize < self.trainData.shape[0]:
                feedDicts = {trainData: self.trainData[step*self.miniBatchSize:(step+1)*self.miniBatchSize]}
                _, errTrain = sess.run([train, loss], feed_dict = feedDicts)
                # Calculate loss without training for validation
                errValid = sess.run([loss], feed_dict = feedDictV)
                errTest = sess.run([loss], feed_dict = feedDictT)
                '''
                kara, hahah, heheh, huhuh = sess.run([loss, haha, hehe, huhu], feed_dict = feedDicts)
                logStdOut("NPPI: " + str(hahah))
                logStdOut("logDetCov: " + str(heheh))
                logStdOut("totalL: " + str(huhuh))
                '''
                xAxis.append(numUpdate)
                yTrainErr.append(errTrain)
                yValidErr.append(errValid)
                yTestErr.append(errTest)
                step += 1
                numUpdate += 1
            currEpoch += 1
            # if currEpoch%10 == 0:
            logStdOut("e: " + str(currEpoch))
        # Calculate everything again without training to ensure randomization is right
        feedDictsFinal = {trainData: self.trainData}
        errTrain, paramFactorMean, paramFactorCovariance, paramFactorWeights = sess.run([loss, factorMean, factorCovariance, factorWeights], feed_dict = feedDictsFinal)
        # Count how many assigned to each class
        currTrainDataShuffle = self.trainData
        self.printPlotResults(xAxis, yTrainErr, yValidErr, yTestErr, numUpdate, currTrainDataShuffle, paramFactorMean, paramFactorCovariance, paramFactorWeights)
예제 #39
0
파일: vb_norm_net.py 프로젝트: hcllaw/VBAgg
def build_net(in_dim,
              n_hidden,
              data_type,
              link='exp',
              total_size=None,
              var_init=0.01,
              bw_indiv=1.0,
              indiv_y_bol=False,
              kernel='ard',
              initialse='identity',
              seed=23,
              dtype=tf.float32,
              landmarks=None,
              avg_label=1.0,
              **others):
    #print('avg_label', avg_label)
    net = Network(in_dim,
                  data_type,
                  n_hidden=n_hidden,
                  link=link,
                  kernel=kernel,
                  var_init=var_init,
                  indiv_bol=indiv_y_bol,
                  dtype=dtype,
                  seed=seed)
    inputs = net.inputs
    params = net.params
    land_size = n_hidden

    cst = partial(tf.cast, dtype=dtype)
    # Model parameters
    initializer = tf.initializers.random_normal(
        seed=seed, dtype=dtype)  # normal initialiser
    z_initializer = tf.zeros_initializer(dtype=dtype)
    o_initializer = tf.ones_initializer(dtype=dtype)
    #initializer = tf.keras.initializers.he_normal(seed=seed)
    print('bw_indiv', bw_indiv)
    if initialse == 'identity':
        triangle_vec = tf.constant(triangular_vec(None, n=land_size),
                                   dtype=dtype)
    elif initialse == 'kernel':
        if kernel == 'additive':
            init_kernel = net.kernel(landmarks,
                                     landmarks,
                                     stddev_ard=bw_indiv[:-2],
                                     scale_ard=0.5,
                                     stddev_mat=bw_indiv[-2:],
                                     scale_mat=0.5,
                                     tensorf=False)
        elif kernel in ['rbf', 'ard']:
            init_kernel = net.kernel(landmarks,
                                     landmarks,
                                     stddev=bw_indiv,
                                     scale=1.0,
                                     tensorf=False)
        L = np.linalg.cholesky(init_kernel)
        triangle_vec = tf.constant(triangular_vec(L, n=land_size), dtype=dtype)
    # Intialise with L = I for safe inversion at start.
    params['L'] = tf.Variable(triangle_vec, name='L', dtype=dtype)
    params['mean'] = tf.Variable(avg_label * o_initializer([land_size, 1]),
                                 name='mean',
                                 dtype=dtype)
    #tf.Variable(tf.tile(tf.constant([7.0], dtype=dtype), land_size))
    #tf.Variable(z_initializer([land_size, 1]), name = 'mean', dtype=dtype)
    params['prior_mean'] = tf.Variable(z_initializer([1]),
                                       name='prior_mean',
                                       dtype=dtype)
    #tf.Variable(tf.constant([7.0], dtype=dtype), name = 'prior_mean', dtype=dtype)
    #tf.Variable(initializer([1]), name = 'prior_mean', dtype=dtype)

    if kernel in ['ard', 'additive']:
        params['log_bw_sq'] = tf.Variable(tf.log(
            tf.square(tf.constant(bw_indiv, dtype=dtype))),
                                          name='log_bw_sq')
        #params['log_bw_sq'] = tf.log(tf.square(tf.constant(bw_indiv, dtype=dtype)), name = 'log_bw_sq')
    elif kernel == 'rbf':
        print('Vary Bandwidth RBF')
        params['log_bw_sq'] = tf.Variable(tf.log(
            tf.square(tf.constant(bw_indiv, dtype=dtype))),
                                          name='log_bw_sq')
        #params['log_bw_sq'] = tf.log(tf.square(tf.constant(bw_indiv, dtype=dtype)))

    n_bags = cst(tf.shape(inputs['sizes'])[0])
    n_indiv = cst(tf.shape(inputs['X'])[0])

    sigma_sq = tf.exp(params['log_sig_sq'])
    scale = tf.exp(params['log_scale'])
    stddev = tf.sqrt(tf.exp(params['log_bw_sq']))
    #stddev = tf.Print(stddev, [stddev], message='bw', summarize=18)
    landmarks = inputs['landmarks']
    inputs_int = tf.concat([
        tf.constant([0], tf.int32),
        tf.cumsum(tf.cast(inputs['sizes'], tf.int32))
    ], 0)
    #inputs_int = tf.Print(inputs_int, [inputs_int])

    Sigma_term0 = tf.map_fn(fn=lambda k: tf.reduce_sum(scale * net.kernel(
        inputs['X'][inputs_int[k]:inputs_int[k + 1], :],
        inputs['X'][inputs_int[k]:inputs_int[k + 1], :],
        stddev=stddev,
        scale=1.0)),
                            elems=tf.range(tf.cast(n_bags, dtype=tf.int32)),
                            dtype=dtype)

    if kernel in ['ard', 'rbf']:
        k_ww = scale * net.kernel(
            landmarks, landmarks, stddev=stddev, scale=1.0)
        k_wz = scale * net.kernel(
            landmarks, inputs['X'], stddev=stddev, scale=1.0)  #K_wz
        k_zz = scale * net.kernel(
            inputs['X'], inputs['X'], stddev=stddev, scale=1.0)  #Change k_zz
        #k_ww = tf.Print(k_ww, [k_ww], message='k_ww', summarize=100)
        #k_wz = tf.Print(k_wz, [k_wz], message='k_wz', summarize=100)
        #k_zz = tf.Print(k_zz, [k_zz], message='k_zz', summarize=100)

        #k_wz = tf.Print(k_wz, [k_wz])
        term_0_diag = scale * tf.ones([tf.cast(n_indiv, dtype=tf.int32)],
                                      dtype=dtype)  #k_zz diagonal
    elif kernel == 'additive':
        scale_mat = tf.exp(params['log_scale_m'])
        k_ww = net.kernel(landmarks,
                          landmarks,
                          stddev_ard=stddev[:-2],
                          scale_ard=scale,
                          stddev_mat=stddev[-2:],
                          scale_mat=scale_mat)
        k_wz = net.kernel(landmarks,
                          inputs['X'],
                          stddev_ard=stddev[:-2],
                          scale_ard=scale,
                          stddev_mat=stddev[-2:],
                          scale_mat=scale_mat)
        #term_0_diag = (scale + scale_mat) * tf.ones([tf.cast(n_indiv, dtype=tf.int32)], dtype=dtype) #k_zz diagonal
    # SLOW: Compute full kernel matrix and then pool pool then take diag.
    #Sigma_term0 = tf.diag_part(net.bag_pool(tf.transpose(net.bag_pool(k_zz))))
    #Sigma_term0 = tf.Print(Sigma_term0, [Sigma_term0, net.bag_pool(k_zz), net.bag_pool(tf.transpose(net.bag_pool(k_zz)))], message='Sigma0', summarize=1000)
    #Sigma_term0 = tf.Print(Sigma_term0, [Sigma_term0, batch_items], summarize=100)
    chol_k = tf.cholesky(k_ww)
    k_ww_inv = tf.matrix_inverse(k_ww)  # K_ww^-1
    triangular = fill_triangular(params['L'])  #\Sigma_u=LL^T

    Sigma_u = tf.matmul(triangular,
                        tf.transpose(triangular))  # Sigma_u = L L^T

    pool_kzw = net.bag_pool(tf.transpose(k_wz))
    #pool_kzw = tf.Print(pool_kzw, [tf.transpose(k_wz), pool_kzw], message='pool_kzw', summarize=100)
    pool_k_zw_k_ww_inv = tf.matmul(pool_kzw, k_ww_inv)
    kw_zw_k_ww_inv = tf.matmul(tf.transpose(k_wz), k_ww_inv)
    #pool_k_zw_k_ww_inv = tf.Print(pool_k_zw_k_ww_inv, [tf.matmul(tf.matmul(tf.transpose(k_wz), k_ww_inv), k_wz)], summarize=100, message='sum')
    #Sigma_term1_check = tf.diag_part(tf.matmul(pool_k_zw_k_ww_inv, tf.transpose(pool_kzw)))
    # Check this: transpose latter and elementwise multiply, sum across axis=1
    Sigma_term1 = tf.reduce_sum(tf.multiply(pool_k_zw_k_ww_inv, pool_kzw),
                                axis=1)
    #Sigma_term1 = tf.Print(Sigma_term1, [Sigma_term1, Sigma_term1_check], message='Sigma_term1')

    pool_k_zw_k_ww_inv_Sig_u = tf.matmul(pool_k_zw_k_ww_inv, Sigma_u)
    #pool_k_zw_k_ww_inv = tf.Print(pool_k_zw_k_ww_inv, [tf.matmul(tf.matmul(kw_zw_k_ww_inv, Sigma_u), tf.transpose(kw_zw_k_ww_inv))], summarize=100, message='sum_2')

    #Sigma_term2_check = tf.diag_part(tf.matmul(pool_k_zw_k_ww_inv_Sig_u, tf.transpose(pool_k_zw_k_ww_inv)))
    # Check this: transpose latter and elementwise multiply, sum across axis=1
    Sigma_term2 = tf.reduce_sum(tf.multiply(pool_k_zw_k_ww_inv_Sig_u,
                                            pool_k_zw_k_ww_inv),
                                axis=1)
    #Sigma_term2 = tf.Print(Sigma_term2, [Sigma_term2, Sigma_term2_check], message='Sigma_term2')

    Sigma_sum_term = Sigma_term0 - Sigma_term1 + Sigma_term2
    #Sigma_sum_term = tf.Print(Sigma_sum_term, [Sigma_term0, Sigma_term1, Sigma_term2])
    k_inv_k_wz = tf.matmul(k_ww_inv, k_wz)  # K_ww^-1 K_wz
    mean_diff = params['mean'] - params['prior_mean']
    #mean_diff = tf.Print(mean_diff, [tf.shape(mean_diff)], message='mean_diff')
    net.mu = mu = params['prior_mean'] + tf.squeeze(
        tf.matmul(tf.transpose(k_inv_k_wz), mean_diff))
    # mu_prior + K_zw K_ww^-1 (mu_u - mu_prior)
    mu_pool = tf.squeeze(net.bag_pool(tf.expand_dims(mu, 1)))  # 1^T mu [bags]
    #mu_pool = tf.Print(mu_pool, [mu_pool, mu], message='mu_pool')

    term_1_0 = tf.square(inputs['y'])  #sum_j y_j^2
    term_1_1 = 2.0 * tf.multiply(inputs['y'],
                                 mu_pool)  # 2 * sum_j(y_j *1^T mu)
    term_1_2 = Sigma_sum_term  # 1^T S 1
    term_1_3 = tf.square(
        mu_pool)  # \sum_j 1^T mu_j mu_j^t 1 = \sum_j (mu_j^t 1)^2

    # Term 1
    #sigma_sq = tf.Print(sigma_sq, [sigma_sq], 'sigma^2')
    bag_sigma_sq = sigma_sq * inputs['sizes']
    #bag_sigma_sq = tf.Print(bag_sigma_sq, [bag_sigma_sq, term_1_0, term_1_1, term_1_2, term_1_3], message='bag_sigma_sq')
    term_1_rescale = tf.divide(term_1_0 - term_1_1 + term_1_2 + term_1_3,
                               bag_sigma_sq)
    term_1 = tf.reduce_sum(term_1_rescale)

    # Term 2 \sum_j log(2 pi sigma^2_j)
    term_2 = tf.reduce_sum(tf.log(2.0 * pi * bag_sigma_sq))

    # Term 3
    tfd = tf.contrib.distributions
    mvn_q = tfd.MultivariateNormalTriL(loc=tf.squeeze(params['mean']),
                                       scale_tril=triangular)
    mvn_u = tfd.MultivariateNormalTriL(loc=tf.tile(params['prior_mean'],
                                                   [land_size]),
                                       scale_tril=chol_k)
    term_3 = tf.distributions.kl_divergence(mvn_q, mvn_u)
    #term_3 = tf.Print(term_3, [0.5* term_1/n_bags, 0.5* term_2/n_bags, term_3/total_size], message='all_terms')
    term_1_diag = tf.reduce_sum(tf.multiply(k_wz, k_inv_k_wz),
                                axis=0)  #diag K_zw K_ww^-1 k_wz
    #term_1_diag_check = tf.diag_part(tf.matmul(tf.transpose(k_wz), k_inv_k_wz))
    k_zw_k_inv_S = tf.matmul(tf.transpose(k_inv_k_wz),
                             Sigma_u)  # k_zw K_ww^-1 Sigma_u
    #term_2_diag_check = tf.diag_part(tf.matmul(k_zw_k_inv_S, k_inv_k_wz))
    term_2_diag = tf.reduce_sum(tf.multiply(tf.transpose(k_zw_k_inv_S),
                                            k_inv_k_wz),
                                axis=0)
    # diagonal as [n_indiv]
    #Sigma_diag_check = Sigma_diag = term_0_diag - term_1_diag + term_2_diag
    net.Sigma_diag = Sigma_diag = term_0_diag - term_1_diag + term_2_diag
    #term_1 = tf.Print(term_1, [term_0_diag,term_1_diag, term_2_diag, tf.sqrt(Sigma_diag), tf.sqrt(Sigma_diag_check)], summarize=3, message='Sigma_diag')
    net.loss = loss = -1.0 / n_bags * (-0.5 * term_1 -
                                       0.5 * term_2) + term_3 / total_size

    #if MAP:
    #net.indiv = indiv = tf.exp(mu - Sigma_diag)
    #else:

    net.indiv = indiv = mu  #tf.squeeze(mu + 0.5 * Sigma_diag))
    #indiv = tf.Print(indiv, [indiv], message='mu', summarize=5)

    #net.indiv = indiv = tf.exp(mu - Sigma_diag)
    net.indiv_se = net.square_err(inputs['indiv_true_y'], indiv)
    net.indiv_nll = net.nll_term(inputs['indiv_y'], indiv)

    #indiv = tf.Print(indiv, [indiv], summarize =200, message='indiv')
    #indiv_mean = tf.exp(mu + 0.5 * Sigma_diag)
    net.indiv_y = indiv_y_pop = tf.multiply(inputs['indiv_pop'], indiv)
    indiv_y_pop = tf.expand_dims(indiv_y_pop, 1)
    net.bag_y = bag_y = tf.squeeze(net.bag_pool(indiv_y_pop))
    bag_y = tf.Print(bag_y, [bag_y, inputs['y']], message='bag')
    net.bag_se = net.square_err(inputs['y'], bag_y, bags=True)
    net.bag_nll = net.nll_term(inputs['y'], bag_y, bags=True)

    #indiv_y_mean = tf.multiply(inputs['indiv_pop'], tf.exp(mu + 0.5 * Sigma_diag))
    #indiv_y_var = tf.multiply(tf.exp(Sigma_diag) - 1.0, tf.exp( 2.0* mu + Sigma_diag) )
    #indiv_y = tf.Print(indiv_y, [indiv_y_mean, inputs['indiv_y'], indiv_y_var], summarize=2)
    #net.bag_se = tf.reduce_sum(tf.square(bag_y - inputs['y']))
    #if indiv_y_bol:
    #    net.indiv_se = tf.reduce_sum(tf.square(indiv_y - inputs['indiv_y']))
    # Can add net.print_out
    return net
예제 #40
0
def uncertain_conditional(Xnew_mu, Xnew_var, feat, kern, q_mu, q_sqrt, *,
                          full_cov_output=False, full_cov=False, white=False):
    """
    Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var).
    See ``conditional`` documentation for further reference.

    :param Xnew_mu: mean of the inputs, size N x Din
    :param Xnew_var: covariance matrix of the inputs, size N x Din x Din
    :param feat: gpflow.InducingFeature object, only InducingPoints is supported
    :param kern: gpflow kernel or ekernel object.
    :param q_mu: mean inducing points, size M x Dout
    :param q_sqrt: cholesky of the covariance matrix of the inducing points, size M x M x Dout
    :param full_cov_output: boolean wheter to compute covariance between output dimension.
                            Influences the shape of return value ``fvar``. Default is False
    :param white: boolean whether to use whitened representation. Default is False.

    :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is N x Dout,
            size ``fvar`` depends on ``full_cov_output``: if True ``f_var`` is N x Dout x Dout,
            if False then ``f_var`` is N x Dout
    """

    # TODO: Tensorflow 1.3 doesn't support broadcasting in``tf.matmul`` and
    # ``tf.matrix_triangular_solve``. This is reported in issue 216.
    # As a temporary workaround, we are using ``tf.einsum`` for the matrix
    # multiplications and tiling in the triangular solves.
    # The code that should be used once the bug is resolved is added in comments.

    if not isinstance(feat, InducingPoints):
        raise NotImplementedError

    if full_cov:
        # TODO: ``full_cov`` True would return a ``fvar`` of shape N x N x D x D,
        # encoding the covariance between input datapoints as well.
        # This is not implemented as this feature is only used for plotting purposes.
        raise NotImplementedError

    num_data = tf.shape(Xnew_mu)[0]  # number of new inputs (N)
    num_func = tf.shape(q_mu)[1]  # output dimension (D)

    q_sqrt_r = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # D x M x M

    eKuf = tf.transpose(feat.eKfu(kern, Xnew_mu, Xnew_var))  # M x N
    Kuu = feat.Kuu(kern, jitter=settings.numerics.jitter_level)  # M x M
    Luu = tf.cholesky(Kuu)  # M x M

    if not white:
        q_mu = tf.matrix_triangular_solve(Luu, q_mu, lower=True)
        Luu_tiled = tf.tile(Luu[None, :, :], [num_func, 1, 1])  # remove line once issue 216 is fixed
        q_sqrt_r = tf.matrix_triangular_solve(Luu_tiled, q_sqrt_r, lower=True)

    Li_eKuf = tf.matrix_triangular_solve(Luu, eKuf, lower=True)  # M x N
    fmean = tf.matmul(Li_eKuf, q_mu, transpose_a=True)

    eKff = kern.eKdiag(Xnew_mu, Xnew_var)  # N
    eKuffu = feat.eKufKfu(kern, Xnew_mu, Xnew_var)  # N x M x M
    Luu_tiled = tf.tile(Luu[None, :, :], [num_data, 1, 1])  # remove this line, once issue 216 is fixed
    Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled, tf.matrix_transpose(eKuffu), lower=True)
    Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled, tf.matrix_transpose(Li_eKuffu_Lit), lower=True)  # N x M x M

    cov = tf.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True)  # D x M x M

    if full_cov_output:
        fvar = (
            tf.matrix_diag(tf.tile((eKff - tf.trace(Li_eKuffu_Lit))[:, None], [1, num_func])) +
            tf.matrix_diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) +
            # tf.matrix_diag(tf.trace(tf.matmul(Li_eKuffu_Lit, cov))) +
            tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) -
            # tf.matmul(q_mu, tf.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) -
            tf.matmul(fmean[:, :, None], fmean[:, :, None], transpose_b=True)
        )
    else:
        fvar = (
            (eKff - tf.trace(Li_eKuffu_Lit))[:, None] +
            tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) +
            tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) -
            fmean ** 2
        )

    return fmean, fvar
예제 #41
0
    def _build_graph(self, raw_weights, raw_means, raw_covars, raw_link_covars,
                     raw_priorlink_covars, raw_inducing_inputs, train_inputs,
                     train_outputs, num_train, test_inputs, test_outputs):
        # normalise weights
        weights = tf.exp(raw_weights) / tf.reduce_sum(tf.exp(raw_weights))

        if self.diag_post:
            covars = tf.exp(raw_covars)
            link_covars = None

        else:
            covars_list = [None] * self.num_components
            for i in range(self.num_components):
                mat = util.vec_to_tri(
                    raw_covars[i, :, :])  #creates mats by row ie r so RxMxM
                diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat))
                exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat)))

                if self.sparse_post:
                    matcol = tf.expand_dims(
                        (mat - diag_mat)[:, :, 1],
                        2)  # extract first col with first element==0
                    padding = [[0, 0], [0, 0], [0, self.num_inducing - 1]]
                    covars_list[i] = tf.pad(matcol, padding) + exp_diag_mat
                else:
                    covars_list[i] = mat - diag_mat + exp_diag_mat

            covars = tf.stack(covars_list, 0)

            # create nested list of posterior link parameters
            #TODO: standardize dummies for prior and post link components (floats vs tensors)
            #TODO: remove free value for block size==1, replace with fixed==1.0 and check dependents
            link_covars = [None] * self.num_components
            for i in range(self.num_components):
                mat = util.vec_to_tri(
                    raw_link_covars[i, :, :]
                )  #creates mats by row ie r so R x max(Qr) x max(Qr)
                diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat))
                exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat)))

                if self.sparse_post:
                    matcol = tf.expand_dims(
                        (mat - diag_mat)[:, :, 1],
                        2)  # extract first col with first element==0
                    padding = [[0, 0], [0, 0], [0, tf.shape(mat)[2] - 1]]
                    mats_in = tf.pad(
                        matcol,
                        padding) + exp_diag_mat  # R x max(Qr) x max(Qr)
                else:
                    mats_in = mat - diag_mat + exp_diag_mat  # R x max(Qr) x max(Qr)

                # trim ragged block sizes and retain as list
                mats_in = tf.unstack(
                    mats_in,
                    axis=0)  # split into R mats shaped max(Qr) x max(Qr)
                for r in range(self.num_block):
                    if len(self.block_struct[r]
                           ) == 1:  # keep dims where trimmed to scalar
                        mats_in[r] = tf.expand_dims(tf.expand_dims(
                            mats_in[r][0, 0], axis=0),
                                                    axis=1)
                    else:
                        mats_in[r] = mats_in[r][:len(self.block_struct[r]), :
                                                len(self.block_struct[r])]

                link_covars[i] = mats_in

        # Both inducing inputs and the posterior means can vary freely so don't change them.
        means = raw_means
        inducing_inputs = raw_inducing_inputs

        # Build the matrices of covariances between inducing inputs.
        kernel_mat = [
            self.kernels[r].kernel(inducing_inputs[r, :, :])
            for r in range(self.num_block)
        ]
        kernel_chol = [tf.cholesky(k) for k in kernel_mat]

        # generate freely parameterized K(j,j') for each block of latent functions
        # where dim (block) = 1 (i.e. independent latent function), mat/chol set == 1
        mat = util.vec_to_tri(
            raw_priorlink_covars
        )  #creates lower diag mats by row ie r so R x max(Qr) x max(Qr)
        diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat))
        exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat)))

        if self.sparse_prior:
            matcol = tf.expand_dims(
                (mat - diag_mat)[:, :, 1],
                2)  # extract first col with first element==0
            padding = [[0, 0], [0, 0], [0, tf.shape(mat)[2] - 1]]
            mats_in = tf.pad(matcol,
                             padding) + exp_diag_mat  # R x max(Qr) x max(Qr)
        else:
            mats_in = mat - diag_mat + exp_diag_mat  # R x max(Qr) x max(Qr)

        kernlink_chol = util.init_list(1.0, [self.num_block])
        for r in range(self.num_block):
            if len(self.block_struct[r]) == 1:  # leave as dummy value == 1.0
                continue
            else:
                kernlink_chol[r] = mats_in[
                    r, :len(self.block_struct[r]), :len(self.block_struct[r])]

        # Now build the objective function.
        entropy = self._build_entropy(weights, means, covars, link_covars)
        cross_ent = self._build_cross_ent(weights, means, covars, link_covars,
                                          kernel_chol, kernlink_chol)
        ell = self._build_ell(weights, means, covars, link_covars,
                              inducing_inputs, kernel_chol, kernlink_chol,
                              train_inputs, train_outputs)
        batch_size = tf.to_float(tf.shape(train_inputs)[0])
        nelbo = -((batch_size / num_train) * (entropy + cross_ent) + ell)

        # Finally, build the prediction function.
        predictions = self._build_predict(weights, means, covars, link_covars,
                                          inducing_inputs, kernel_chol,
                                          kernlink_chol, test_inputs)
        # Build the nlpd function.
        general_nlpd = self._build_nlpd(weights, means, covars, link_covars,
                                        inducing_inputs, kernel_chol,
                                        kernlink_chol, test_inputs,
                                        test_outputs)
        return nelbo, predictions, general_nlpd
예제 #42
0
 def testWrongDimensions(self):
     tensor3 = tf.constant([1., 2.])
     with self.assertRaises(ValueError):
         tf.cholesky(tensor3)
예제 #43
0
def variational_sgpr(X,
                     Z,
                     ls=1.,
                     kern_func=rbf,
                     ridge_factor=1e-3,
                     mfvi_mixture=False,
                     n_mixture=1):
    """Defines the mean-field variational family for GPR.

    Args:
        X: (np.ndarray of float32) input training features, with dimension (Nx, D).
        Z: (np.ndarray of float32) inducing points, with dimension (Nz, D).
        ls: (float32) length scale parameter.
        kern_func: (function) kernel function.
        ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition
        mfvi_mixture: (float32) Whether to output variational family with a
            mixture of MFVI.
        n_mixture: (int) Number of MFVI mixture component to add.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
        mixture_par_list: (list of tf.Variable) variational parameters for
            MFVI mixture ('mixture_logits', 'mixture_logits_mfvi_mix',
            'mean_mfvi', 'sdev_mfvi') if mfvi_mixture=True, else [].
    """
    X = tf.convert_to_tensor(X)
    Z = tf.convert_to_tensor(Z)

    Nx, Nz = X.shape.as_list()[0], Z.shape.as_list()[0]

    # 1. Prepare constants
    # compute matrix constants
    Kxx = kern_func(X, ls=ls)
    Kxz = kern_func(X, Z, ls=ls)
    Kzz = kern_func(Z, ls=ls, ridge_factor=ridge_factor)

    # compute null covariance matrix using Cholesky decomposition
    Kzz_chol_inv = tf.matrix_inverse(tf.cholesky(Kzz))
    Kzz_inv = tf.matmul(Kzz_chol_inv, Kzz_chol_inv, transpose_a=True)

    Kxz_Kzz_chol_inv = tf.matmul(Kxz, Kzz_chol_inv, transpose_b=True)
    Kxz_Kzz_inv = tf.matmul(Kxz, Kzz_inv)
    Sigma_pre = Kxx - tf.matmul(
        Kxz_Kzz_chol_inv, Kxz_Kzz_chol_inv, transpose_b=True)

    # 2. Define variational parameters
    # define mean and variance for sigma
    q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean')
    q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev'))

    # define free parameters (i.e. mean and full covariance of f_latent)
    m = tf.get_variable(shape=[Nz], name='qf_m')
    s = tf.get_variable(
        shape=[Nz * (Nz + 1) / 2],
        # initializer=tf.zeros_initializer(),
        name='qf_s')
    L = fill_triangular(s, name='qf_chol')
    S = tf.matmul(L, L, transpose_b=True)

    # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent))
    qf_mean = tf.tensordot(Kxz_Kzz_inv, m, [[1], [0]], name='qf_mean')
    qf_cov = (
        Sigma_pre +
        tf.matmul(Kxz_Kzz_inv, tf.matmul(S, Kxz_Kzz_inv, transpose_b=True)) +
        ridge_factor * tf.eye(Nx, dtype=tf.float32))

    # define variational family
    mixture_par_list = []
    if mfvi_mixture:
        gp_dist = tfd.MultivariateNormalFullCovariance(
            loc=qf_mean, covariance_matrix=qf_cov)
        q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family(
            n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f')
    else:
        q_f = ed.MultivariateNormalFullCovariance(loc=qf_mean,
                                                  covariance_matrix=qf_cov,
                                                  name='q_f')
    q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig')

    return q_f, q_sig, qf_mean, qf_cov, mixture_par_list
예제 #44
0
    def build_KL(self):
        """
        The covariance of q(u) has a kronecker structure, so
        appropriate reductions apply for the trace and logdet terms.
        """
        # Mahalanobis term, m^T K^{-1} m
        Kuu = [
            make_Kuu(kern, a, b, self.ms)
            for kern, a, b, in zip(self.kerns, self.a, self.b)
        ]
        Kim = kron_vec_apply(Kuu, self.q_mu, 'solve')
        KL = 0.5 * tf.reduce_sum(self.q_mu * Kim)

        # Constant term
        KL += -0.5 * tf.cast(tf.size(self.q_mu), float_type)

        # Log det term
        Ls = [
            tf.matrix_band_part(q_sqrt_d, -1, 0)
            for q_sqrt_d in self.q_sqrt_kron
        ]
        N_others = [float(np.prod(self.Ms)) / M for M in self.Ms]
        Q_logdets = [
            tf.reduce_sum(tf.log(tf.square(tf.diag_part(L)))) for L in Ls
        ]
        KL += -0.5 * reduce(
            tf.add, [N * logdet for N, logdet in zip(N_others, Q_logdets)])

        # trace term tr(K^{-1} Sigma_q)
        Ss = [tf.matmul(L, tf.transpose(L)) for L in Ls]
        traces = [K.trace_KiX(S) for K, S, in zip(Kuu, Ss)]
        KL += 0.5 * reduce(tf.multiply,
                           traces)  # kron-trace is the produce of traces

        # log det term Kuu
        Kuu_logdets = [K.logdet() for K in Kuu]
        KL += 0.5 * reduce(
            tf.add, [N * logdet for N, logdet in zip(N_others, Kuu_logdets)])

        if self.use_two_krons:
            # extra logdet terms:
            Ls_2 = [
                tf.matrix_band_part(q_sqrt_d, -1, 0)
                for q_sqrt_d in self.q_sqrt_kron_2
            ]
            LiL = [
                tf.matrix_triangular_solve(L1, L2) for L1, L2 in zip(Ls, Ls_2)
            ]
            eigvals = [
                tf.self_adjoint_eig(tf.matmul(tf.transpose(mat), mat))[0]
                for mat in LiL
            ]  # discard eigenvectors
            eigvals_kronned = kron([tf.reshape(e, [1, -1]) for e in eigvals])
            KL += -0.5 * tf.reduce_sum(tf.log(1 + eigvals_kronned))

            # extra trace terms
            Ss = [tf.matmul(L, tf.transpose(L)) for L in Ls_2]
            traces = [K.trace_KiX(S) for K, S, in zip(Kuu, Ss)]
            KL += 0.5 * reduce(tf.multiply,
                               traces)  # kron-trace is the produce of traces

        elif self.use_extra_ranks:
            # extra logdet terms
            KiW = kron_mat_apply(Kuu, self.q_sqrt_W, 'solve',
                                 self.use_extra_ranks)
            WTKiW = tf.matmul(tf.transpose(self.q_sqrt_W), KiW)
            L_extra = tf.cholesky(np.eye(self.use_extra_ranks) + WTKiW)
            KL += -0.5 * tf.reduce_sum(tf.log(tf.square(
                tf.diag_part(L_extra))))

            # extra trace terms
            KL += 0.5 * tf.reduce_sum(tf.diag_part(WTKiW))

        return KL
예제 #45
0
def uncertain_conditional(Xnew_mu,
                          Xnew_var,
                          feat,
                          kern,
                          q_mu,
                          q_sqrt,
                          *,
                          mean_function=None,
                          full_cov_output=False,
                          full_cov=False,
                          white=False):
    """
    Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var).
    See ``conditional`` documentation for further reference.

    :param Xnew_mu: mean of the inputs, size N x Din
    :param Xnew_var: covariance matrix of the inputs, size N x Din x Din
    :param feat: gpflow.InducingFeature object, only InducingPoints is supported
    :param kern: gpflow kernel or ekernel object.
    :param q_mu: mean inducing points, size M x Dout
    :param q_sqrt: cholesky of the covariance matrix of the inducing points, size Dout x M x M
    :param full_cov_output: boolean wheter to compute covariance between output dimension.
                            Influences the shape of return value ``fvar``. Default is False
    :param white: boolean whether to use whitened representation. Default is False.

    :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is N x Dout,
            size ``fvar`` depends on ``full_cov_output``: if True ``f_var`` is N x Dout x Dout,
            if False then ``f_var`` is N x Dout
    """

    # TODO: Tensorflow 1.4 doesn't support broadcasting in``tf.matmul`` and
    # ``tf.matrix_triangular_solve``. This is reported in issue 216.
    # As a temporary workaround, we are using ``tf.einsum`` for the matrix
    # multiplications and tiling in the triangular solves.
    # The code that should be used once the bug is resolved is added in comments.

    if not isinstance(feat, InducingPoints):
        raise NotImplementedError

    if full_cov:
        # TODO: ``full_cov`` True would return a ``fvar`` of shape N x N x D x D,
        # encoding the covariance between input datapoints as well.
        # This is not implemented as this feature is only used for plotting purposes.
        raise NotImplementedError

    pXnew = Gaussian(Xnew_mu, Xnew_var)

    num_data = tf.shape(Xnew_mu)[0]  # number of new inputs (N)
    num_ind = tf.shape(q_mu)[0]  # number of inducing points (M)
    num_func = tf.shape(q_mu)[1]  # output dimension (D)

    q_sqrt_r = tf.matrix_band_part(q_sqrt, -1, 0)  # D x M x M

    eKuf = tf.transpose(expectation(pXnew, (kern, feat)))  # M x N (psi1)
    Kuu = feat.Kuu(kern, jitter=settings.numerics.jitter_level)  # M x M
    Luu = tf.cholesky(Kuu)  # M x M

    if not white:
        q_mu = tf.matrix_triangular_solve(Luu, q_mu, lower=True)
        Luu_tiled = tf.tile(
            Luu[None, :, :],
            [num_func, 1, 1])  # remove line once issue 216 is fixed
        q_sqrt_r = tf.matrix_triangular_solve(Luu_tiled, q_sqrt_r, lower=True)

    Li_eKuf = tf.matrix_triangular_solve(Luu, eKuf, lower=True)  # M x N
    fmean = tf.matmul(Li_eKuf, q_mu, transpose_a=True)

    eKff = expectation(pXnew, kern)  # N (psi0)
    eKuffu = expectation(pXnew, (kern, feat), (kern, feat))  # N x M x M (psi2)
    Luu_tiled = tf.tile(
        Luu[None, :, :],
        [num_data, 1, 1])  # remove this line, once issue 216 is fixed
    Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled,
                                               tf.matrix_transpose(eKuffu),
                                               lower=True)
    Li_eKuffu_Lit = tf.matrix_triangular_solve(
        Luu_tiled, tf.matrix_transpose(Li_eKuffu_Lit), lower=True)  # N x M x M
    cov = tf.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True)  # D x M x M

    if mean_function is None or isinstance(mean_function, mean_functions.Zero):
        e_related_to_mean = tf.zeros((num_data, num_func, num_func),
                                     dtype=settings.float_type)
    else:
        # Update mean: \mu(x) + m(x)
        fmean = fmean + expectation(pXnew, mean_function)

        # Calculate: m(x) m(x)^T + m(x) \mu(x)^T + \mu(x) m(x)^T,
        # where m(x) is the mean_function and \mu(x) is fmean
        e_mean_mean = expectation(pXnew, mean_function,
                                  mean_function)  # N x D x D
        Lit_q_mu = tf.matrix_triangular_solve(Luu, q_mu, adjoint=True)
        e_mean_Kuf = expectation(pXnew, mean_function,
                                 (kern, feat))  # N x D x M
        # einsum isn't able to infer the rank of e_mean_Kuf, hence we explicitly set the rank of the tensor:
        e_mean_Kuf = tf.reshape(e_mean_Kuf, [num_data, num_func, num_ind])
        e_fmean_mean = tf.einsum("nqm,mz->nqz", e_mean_Kuf,
                                 Lit_q_mu)  # N x D x D
        e_related_to_mean = e_fmean_mean + tf.matrix_transpose(
            e_fmean_mean) + e_mean_mean

    if full_cov_output:
        fvar = (
            tf.matrix_diag(
                tf.tile(
                    (eKff - tf.trace(Li_eKuffu_Lit))[:, None], [1, num_func]))
            + tf.matrix_diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) +
            # tf.matrix_diag(tf.trace(tf.matmul(Li_eKuffu_Lit, cov))) +
            tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) -
            # tf.matmul(q_mu, tf.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) -
            fmean[:, :, None] * fmean[:, None, :] + e_related_to_mean)
    else:
        fvar = ((eKff - tf.trace(Li_eKuffu_Lit))[:, None] +
                tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) +
                tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) -
                fmean**2 + tf.matrix_diag_part(e_related_to_mean))

    return fmean, fvar
예제 #46
0
def cholesky_covariance(x, sample_axis=0, keepdims=False, name=None):
    """Cholesky factor of the covariance matrix of vector-variate random samples.

  This function can be use to fit a multivariate normal to data.

  ```python
  tf.enable_eager_execution()
  import tensorflow_probability as tfp
  tfd = tfp.distributions

  # Assume data.shape = (1000, 2).  1000 samples of a random variable in R^2.
  observed_data = read_data_samples(...)

  # The mean is easy
  mu = tf.reduce_mean(observed_data, axis=0)

  # Get the scale matrix
  L = tfp.stats.cholesky_covariance(observed_data)

  # Make the best fit multivariate normal (under maximum likelihood condition).
  mvn = tfd.MultivariateNormalTriL(loc=mu, scale_tril=L)

  # Plot contours of the pdf.
  xs, ys = tf.meshgrid(
      tf.linspace(-5., 5., 50), tf.linspace(-5., 5., 50), indexing='ij')
  xy = tf.stack((tf.reshape(xs, [-1]), tf.reshape(ys, [-1])), axis=-1)
  pdf = tf.reshape(mvn.prob(xy), (50, 50))
  CS = plt.contour(xs, ys, pdf, 10)
  plt.clabel(CS, inline=1, fontsize=10)
  ```

  Why does this work?
  Given vector-variate random variables `X = (X1, ..., Xd)`, one may obtain the
  sample covariance matrix in `R^{d x d}` (see `tfp.stats.covariance`).

  The [Cholesky factor](https://en.wikipedia.org/wiki/Cholesky_decomposition)
  of this matrix is analogous to standard deviation for scalar random variables:
  Suppose `X` has covariance matrix `C`, with Cholesky factorization `C = L L^T`
  Then multiplying a vector of iid random variables which have unit variance by
  `L` produces a vector with covariance `L L^T`, which is the same as `X`.

  ```python
  observed_data = read_data_samples(...)
  L = tfp.stats.cholesky_covariance(observed_data, sample_axis=0)

  # Make fake_data with the same covariance as observed_data.
  uncorrelated_normal = tf.random_normal(shape=(500, 10))
  fake_data = tf.linalg.matvec(L, uncorrelated_normal)
  ```

  Args:
    x:  Numeric `Tensor`.  The rightmost dimension of `x` indexes events. E.g.
      dimensions of a random vector.
    sample_axis: Scalar or vector `Tensor` designating axis holding samples.
      Default value: `0` (leftmost dimension). Cannot be the rightmost dimension
        (since this indexes events).
    keepdims:  Boolean.  Whether to keep the sample axis as singletons.
    name: Python `str` name prefixed to Ops created by this function.
          Default value: `None` (i.e., `'covariance'`).

  Returns:
    chol:  `Tensor` of same `dtype` as `x`.  The last two dimensions hold
      lower triangular matrices (the Cholesky factors).
  """
    with tf.name_scope(name, 'cholesky_covariance', values=[x, sample_axis]):
        sample_axis = tf.convert_to_tensor(sample_axis, dtype=tf.int32)
        cov = covariance(x,
                         sample_axis=sample_axis,
                         event_axis=-1,
                         keepdims=keepdims)
        return tf.cholesky(cov)
예제 #47
0
def symmetric_log_det(x, name=None):
    """
    Compute the log determinant of a symmetric positive definite matrix.
    """
    chol = tf.cholesky(as_tensor(x))
    return cholesky_log_det(chol, name)
예제 #48
0
print("# Convert to tensor")
ConvertToTensor = tf.convert_to_tensor(
    np.array([[1., 2.0, 3.0], [-3.0, -7.0, -1.], [0., 5.0, -2.]]))
print(sess.run(ConvertToTensor))

print("##Matric  Add Operation")
print(sess.run(TwotimeThreeMartix + TwotimeThreeConstantMartic))

print("##matrix Sub Operation")
print(sess.run(TwotimeThreeMartix - TwotimeThreeConstantMartic))

print("##Matrix Multiplication")
print(sess.run(tf.matmul(TwotimeThreeConstantMartic, identity_matrix)))

print("##martix Transpose")
print(sess.run(tf.transpose(TwotimeThreeConstantMartic)))

print("##Matrix Determinant")
print(sess.run(tf.matrix_determinant(ConvertToTensor)))

print("##Martic Inverse")
print(sess.run(tf.matrix_inverse(ConvertToTensor)))

print("## Cholesky Decomposition")
print(sess.run(tf.cholesky(identity_matrix)))

eigenvalue, eigenvectors = sess.run(tf.self_adjoint_eig(identity_matrix))
print("Value is", eigenvalue)
print("Vector is ", eigenvectors)
예제 #49
0
# it seems like the test and training data need to have the same N
X_test, y_test = X_test[:-1, :], y_test[:-1]

# unfortunately not sure how to make the linear kernel work at this moment
N, P = X_train.shape
X_tf = tf.placeholder(tf.float32, [N, P])

# latent stochastic function
# ok so here in the loc position is where we can get (x *element-wise* b)
b = Bernoulli(varbvs_prior, dtype=np.float32)  # prior from varbvs
gp_mu = tf.reduce_mean(tf.multiply(X_tf, tf.reshape(tf.tile(b, [N]), [N, P])),
                       1)  # mean for prior over GP

f = MultivariateNormalTriL(
    loc=gp_mu,
    scale_tril=tf.cholesky(
        rbf(X_tf))  # uses rbf kernel for covariance of GP for now
)

qf = Normal(loc=tf.get_variable("qf/loc", [N]),
            scale=tf.nn.softplus(tf.get_variable("qf/scale", [N])))

# respose
y_tf = Bernoulli(logits=f)

# inference
infer = ed.KLqp({f: qf}, data={X_tf: X_train, y_tf: y_train})
infer.run(n_samples=3, n_iter=5000)

# criticism
y_post = ed.copy(y_tf, {f: qf})
ed.evaluate('binary_accuracy', data={X_tf: X_test, y_post: y_test})
예제 #50
0
def fully_correlated_conditional_repeat(Kmn, Kmm, Knn, f, *, full_cov=False, full_output_cov=False, q_sqrt=None,
                                        white=False):
    """
    This function handles conditioning of multi-output GPs in the case where the conditioning
    points are all fully correlated, in both the prior and posterior.

    Note: This conditional can handle 'repetitions' R, given in `f` and `q_sqrt`.

    :param Kmn: LM x N x P
    :param Kmm: LM x LM
    :param Knn: N x P or N x P x N x P
    :param f: data matrix, LM x R
    :param q_sqrt: R x LM x LM  or R x ML
    :param full_cov: calculate covariance between inputs
    :param full_output_cov: calculate covariance between outputs
    :param white: use whitened representation
    :return:
        - mean: R x N x P
        - variance: R x N x P, R x N x P x P, R x P x N x N, R x N x P x N x P
    """
    logger.debug("fully correlated conditional")
    R = tf.shape(f)[1]
    M, N, K = [tf.shape(Kmn)[i] for i in range(Kmn.shape.ndims)]
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    # Lm: M x M    Kmn: M x NK
    Kmn = tf.reshape(Kmn, (M, N * K))  # M x NK
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)  # M x NK
    Ar = tf.reshape(A, (M, N, K))

    # compute the covariance due to the conditioning
    if full_cov and full_output_cov:
        # fvar = Knn - tf.matmul(Ar, Ar, transpose_a=True)  # NK x NK, then reshape?
        fvar = Knn - tf.tensordot(Ar, Ar, [[0], [0]])  # N x K x N x K
    elif full_cov and not full_output_cov:
        At = tf.transpose(Ar)  # K x N x M
        fvar = Knn - tf.matmul(At, At, transpose_b=True)  # K x N x N
    elif not full_cov and full_output_cov:
        # This transpose is annoying
        At = tf.transpose(Ar, [1, 0, 2])  # N x M x K
        # fvar = Knn - tf.einsum('mnk,mnl->nkl', Ar, Ar)
        fvar = Knn - tf.matmul(At, At, transpose_a=True)  # N x K x K
    elif not full_cov and not full_output_cov:
        # Knn: N x K
        fvar = Knn - tf.reshape(tf.reduce_sum(tf.square(A), [0]), (N, K))  # Can also do this with a matmul

    # another backsubstitution in the unwhitened case
    if not white:
        # A = tf.matrix_triangular_solve(tf.matrix_transpose(Lm), A, lower=False)  # M x NK
        raise NotImplementedError("Need to verify this.")  # pragma: no cover

    # f: M x R
    fmean = tf.matmul(f, A, transpose_a=True)  # R x M  *  M x NK  ->  R x NK
    fmean = tf.reshape(fmean, (R, N, K))  # R x N x K

    if q_sqrt is not None:
        Lf = tf.matrix_band_part(q_sqrt, -1, 0)  # R x M x M
        if q_sqrt.get_shape().ndims == 3:
            A_tiled = tf.tile(A[None, :, :], tf.stack([R, 1, 1]))  # R x M x NK
            LTA = tf.matmul(Lf, A_tiled, transpose_a=True)  # R x M x NK
        elif q_sqrt.get_shape().ndims == 2:  # pragma: no cover
            raise NotImplementedError("Does not support diagonal q_sqrt yet...")
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))

        if full_cov and full_output_cov:
            addvar = tf.matmul(LTA, LTA, transpose_a=True)  # R x NK x NK
            fvar = fvar[None, :, :, :, :] + tf.reshape(addvar, (R, N, K, N, K))
        elif full_cov and not full_output_cov:
            LTAr = tf.transpose(tf.reshape(LTA, [R, M, N, K]), [0, 3, 1, 2])  # R x K x M x N
            addvar = tf.matmul(LTAr, LTAr, transpose_a=True)  # R x K x N x N
            fvar = fvar[None, ...] + addvar  # R x K x N x N
        elif not full_cov and full_output_cov:
            LTAr = tf.transpose(tf.reshape(LTA, (R, M, N, K)), [0, 2, 3, 1])  # R x N x K x M
            fvar = fvar[None, ...] + tf.matmul(LTAr, LTAr, transpose_b=True)  # R x N x K x K
        elif not full_cov and not full_output_cov:
            addvar = tf.reshape(tf.reduce_sum(tf.square(LTA), axis=1), (R, N, K))  # R x N x K
            fvar = fvar[None, ...] + addvar  # R x N x K
    else:
        fvar = tf.broadcast_to(fvar[None], tf.shape(fmean))
    return fmean, fvar
예제 #51
0
def main():
    u.reset_timeit()

    iters = 11
    n = 10000

    print(f"Benchmarking n={n}")

    ############################################################
    # Numpy
    ############################################################
    A = scipy.randn(n, n)  # random matrix
    A = A @ A.T  # positive definite matrix
    A = scipy.linalg.cholesky(A)  # upper diagonal matrix
    b = scipy.randn(n)

    u.reset_timeit()
    for i in range(iters):
        with u.timeit('numpy'):
            scipy.linalg.solve_triangular(A, b)

    ############################################################
    # PyTorch GPU
    ############################################################
    A = torch.randn(n, n)
    A = A @ A.t() + torch.diag(torch.ones(n))
    A = torch.potrf(A).cuda()
    b = torch.randn(n, 1).cuda()

    # prewarm
    torch.trtrs(b, A)
    for i in range(iters):
        torch.cuda.synchronize()
        with u.timeit('Pytorch GPU'):
            result = torch.trtrs(b, A)
            torch.cuda.synchronize()
        del result

    ############################################################
    # PyTorch CPU
    ############################################################
    A = torch.randn(n, n)
    A = A @ A.t() + torch.diag(torch.ones(n))
    A = torch.potrf(A)
    b = torch.randn(n, 1)

    # prewarm
    (result, A_clone) = torch.trtrs(b, A)
    assert result.device.type == 'cpu'

    for i in range(iters):
        torch.cuda.synchronize()
        with u.timeit('Pytorch CPU'):
            result = torch.trtrs(b, A)
            torch.cuda.synchronize()
        del result

    ############################################################
    # PyTorch GPU
    ############################################################
    A = torch.randn(n, n)
    A = A @ A.t() + torch.diag(torch.ones(n))
    A = torch.potrf(A).cuda()
    b = torch.randn(n, 1).cuda()

    # prewarm
    (result, A_clone) = torch.trtrs(b, A)
    assert result.device.type == 'cuda'
    for i in range(iters):
        torch.cuda.synchronize()
        with u.timeit('Pytorch GPU'):
            (result, dummy) = torch.trtrs(b, A)
            print(result[0, 0])
            #      torch.cuda.synchronize()
        del result

    ############################################################
    # Tensorflow GPU
    ############################################################
    A = tf.random_normal((n, n)).gpu()
    b = tf.random_normal((n, 1)).gpu()
    A = A @ tf.transpose(A) + tf.diag(tf.ones(
        (n, )))  # bug, diag is needed, or Cholesky fails
    A = tf.cholesky(A)
    # bug, Should be able to do constant conversion, but fails with
    # Internal: failed to query device pointer for context: CUDA_ERROR_INVALID_VALUE
    #  A = tf.constant(A).gpu()
    #  b = tf.constant(b).gpu()

    # prewarm
    result = tf.contrib.eager.Variable(tf.zeros((n, 1)))
    result.assign(tf.linalg.triangular_solve(A, b))
    assert 'gpu' in result.device.lower()
    for i in range(iters):
        b += 1  # prevent caching
        with u.timeit('TF GPU'):
            result.assign(tf.linalg.triangular_solve(A, b))
            print(result[0, 0])

    ############################################################
    # Tensorflow CPU
    ############################################################
    A = tf.random_normal((n, n)).cpu()
    b = tf.random_normal((n, 1)).cpu()
    A = A @ tf.transpose(A) + tf.diag(tf.ones(
        (n, )))  # bug, diag is needed, or Cholesky fails
    A = tf.cholesky(A)
    A = A.cpu()
    b = b.cpu()

    # prewarm
    with tf.device('/cpu:0'):
        result = tf.contrib.eager.Variable(tf.zeros((n, 1)))
    result.assign(tf.linalg.triangular_solve(A, b))
    assert 'cpu' in result.device.lower()
    for i in range(iters):
        b += 1  # prevent caching
        with u.timeit('TF CPU'):
            result.assign(tf.linalg.triangular_solve(A, b))

    u.summarize_timeit()
예제 #52
0
def get_sgpr_parameters(self):
    """Get parameters from a Gpflow Sparse Variational GP Regressor."""
    num_inducing_points = len(self.feature)

    # Reference
    # https://github.com/GPflow/GPflow/blob/develop/doc/source/notebooks/SGPR_notes.ipynb
    #
    # Predictive distribution
    # p(f*) = Normal(mean=K_{*u} L^{-T}L_B^{-T}c,
    #                cov=K_{**} - K_{*u} L^{-T} (1-B^{-1}) L^{-1} K_{u*})
    #
    # where
    # u: Inducing points
    # f: Data points
    # *: Prediction points
    #
    # Code based on SGPR._build_predict

    with tf.name_scope("Kuf"):
        # [NUM_INDUCING, NUM_DATA]
        Kuf = gpflow.features.Kuf(self.feature, self.kern, self.X)
    with tf.name_scope("Kuu"):
        # [NUM_INDUCING, NUM_INDUCING]
        Kuu = gpflow.features.Kuu(self.feature,
                                  self.kern,
                                  jitter=gpflow.settings.numerics.jitter_level)
    with tf.name_scope("sigma"):
        # []
        sigma = tf.sqrt(self.likelihood.variance)

    with tf.name_scope("eye"):
        # [NUM_INDUCING, NUM_INDUCING]
        eye = tf.eye(num_inducing_points, dtype=gpflow.settings.float_type)

    with tf.name_scope("L"):
        # [NUM_INDUCING, NUM_INDUCING]
        L = tf.cholesky(Kuu)
    with tf.name_scope("A"):
        # [NUM_INDUCING, NUM_DATA]
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
    with tf.name_scope("B"):
        # [NUM_INDUCING, NUM_INDUCING]
        B = tf.matmul(A, A, transpose_b=True) + eye

    with tf.name_scope("LB"):
        # [NUM_INDUCING, NUM_INDUCING]
        LB = tf.cholesky(B)

    with tf.name_scope("Ay"):
        # [NUM_INDUCING, OUT_DIM]
        Ay = tf.matmul(A, self.Y)
    with tf.name_scope("c"):
        # [NUM_INDUCING, OUT_DIM]
        c = tf.matrix_triangular_solve(LB, Ay, lower=True) / sigma
    with tf.name_scope("tmp1"):
        # [NUM_INDUCING, NUM_INDUCING]
        tmp1 = tf.matrix_triangular_solve(L, eye, lower=True)
    with tf.name_scope("tmp2"):
        # [NUM_INDUCING, NUM_INDUCING]
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
    with tf.name_scope("alpha"):
        # [NUM_INDUCING, OUT_DIM]
        alpha = tf.matmul(tmp2, c, transpose_a=True)

    return {
        "inducing_points": self.feature.Z,
        "coefficients": tf.matrix_transpose(alpha),
        "signal_variance": self.kern.variance[None],
        "length_scale": self.kern.lengthscales[None, :],
        "noise_variance": self.likelihood.variance[None],
        "gram_L": L[None, :, :],
        "B_L": LB[None, :, :],
    }
예제 #53
0
def draw_GP(Yi, Ti, Xi, ind_kfi, ind_kti, method, gp_params):
    """ 
    given GP hyperparams and data values at observation times, draw from 
    conditional GP
    
    inputs:
        length,noises,Lf,Kf: GP params
        Yi: observation values
        Ti: observation times
        Xi: grid points (new times for tcn)
        ind_kfi,ind_kti: indices into Y
    returns:
        draws from the GP at the evenly spaced grid times Xi, given hyperparams and data
    """
    n_mc_smps, length, noises, Lf, Kf = gp_params.n_mc_smps, gp_params.length, gp_params.noises, gp_params.Lf, gp_params.Kf
    M = gp_params.M
    ny = tf.shape(Yi)[0]
    K_tt = OU_kernel(length, Ti, Ti)
    D = tf.diag(noises)

    grid_f = tf.meshgrid(ind_kfi, ind_kfi)  #same as np.meshgrid
    Kf_big = tf.gather_nd(Kf, tf.stack((grid_f[0], grid_f[1]), -1))

    grid_t = tf.meshgrid(ind_kti, ind_kti)
    Kt_big = tf.gather_nd(K_tt, tf.stack((grid_t[0], grid_t[1]), -1))

    Kf_Ktt = tf.multiply(Kf_big, Kt_big)

    DI_big = tf.gather_nd(D, tf.stack((grid_f[0], grid_f[1]), -1))
    DI = tf.diag(tf.diag_part(DI_big))  #D kron I

    #data covariance.
    #Either need to take Cholesky of this or use CG / block CG for matrix-vector products
    Ky = Kf_Ktt + DI + method.add_diag * tf.eye(ny)

    ### build out cross-covariances and covariance at grid

    nx = tf.shape(Xi)[0]

    K_xx = OU_kernel(length, Xi, Xi)
    K_xt = OU_kernel(length, Xi, Ti)

    ind = tf.concat([tf.tile([i], [nx]) for i in range(M)], 0)
    grid = tf.meshgrid(ind, ind)
    Kf_big = tf.gather_nd(Kf, tf.stack((grid[0], grid[1]), -1))
    ind2 = tf.tile(tf.range(nx), [M])
    grid2 = tf.meshgrid(ind2, ind2)
    Kxx_big = tf.gather_nd(K_xx, tf.stack((grid2[0], grid2[1]), -1))

    K_ff = tf.multiply(Kf_big, Kxx_big)  #cov at grid points

    full_f = tf.concat([tf.tile([i], [nx]) for i in range(M)], 0)
    grid_1 = tf.meshgrid(full_f, ind_kfi, indexing='ij')
    Kf_big = tf.gather_nd(Kf, tf.stack((grid_1[0], grid_1[1]), -1))
    full_x = tf.tile(tf.range(nx), [M])
    grid_2 = tf.meshgrid(full_x, ind_kti, indexing='ij')
    Kxt_big = tf.gather_nd(K_xt, tf.stack((grid_2[0], grid_2[1]), -1))

    K_fy = tf.multiply(Kf_big, Kxt_big)

    #now get draws!
    y_ = tf.reshape(Yi, [-1, 1])

    xi = tf.random_normal((nx * M, n_mc_smps))
    #print('xi shape:')
    #print(xi.shape)

    if method.methodname == 'chol':
        Ly = tf.cholesky(Ky)
        Mu = tf.matmul(K_fy, tf.cholesky_solve(Ly, y_))
        Sigma = K_ff - tf.matmul(K_fy, tf.cholesky_solve(
            Ly,
            tf.transpose(K_fy))) + method.add_diag * tf.eye(tf.shape(K_ff)[0])
        #Exp2: increase noise on Sigma 1e-6 to 1e-3, to 1e-1?
        #Sigma = tf.cast(Sigma, tf.float64) ## Experiment: is chol instable and needs float64? Will this crash Memory?
        #draw = Mu + tf.matmul(tf.cast(tf.cholesky(Sigma),tf.float32),xi)
        draw = Mu + tf.matmul(tf.cholesky(Sigma), xi)
        draw_reshape = tf.transpose(tf.reshape(tf.transpose(draw),
                                               [n_mc_smps, M, nx]),
                                    perm=[0, 2, 1])
        #print('cholesky draw:')
        #print(sess.run(draw_reshape))

    elif method.methodname == 'cg':
        Mu = tf.matmul(K_fy, CG(Ky,
                                y_))  #May be faster with CG for large problems

        #Never need to explicitly compute Sigma! Just need matrix products with Sigma in Lanczos algorithm
        def Sigma_mul(vec):
            # vec must be a 2d tensor, shape (?,?)
            return tf.matmul(K_ff, vec) - tf.matmul(
                K_fy, block_CG(Ky, tf.matmul(tf.transpose(K_fy), vec)))

        def large_draw():
            return Mu + block_Lanczos(
                Sigma_mul, xi, n_mc_smps)  #no need to explicitly reshape Mu

        #draw = tf.cond(tf.less(nx*M,BLOCK_LANC_THRESH),small_draw,large_draw)
        draw = large_draw()
        draw_reshape = tf.transpose(tf.reshape(tf.transpose(draw),
                                               [n_mc_smps, M, nx]),
                                    perm=[0, 2, 1])
        #print('cg draw shape:')
        #print(draw_reshape.shape)

    return draw_reshape
예제 #54
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X, for K functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
        shape = tf.pack([tf.shape(f)[1], 1, 1])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        shape = tf.pack([tf.shape(f)[1], 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                          0)  # D x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0),
                              tf.pack([tf.shape(f)[1], 1, 1]))
            LTA = tf.batch_matmul(L, A_tiled, adj_x=True)  # D x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # D x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
    fvar = tf.transpose(fvar)  # N x D or N x N x D

    return fmean, fvar
예제 #55
0
 def backward_tensor(self, y):
     return tf.cholesky(y)
예제 #56
0
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False):
    """
    Given a g1 and g2, and distribution p and q such that
      p(g2) = N(g2;0,Kmm)
      p(g1) = N(g1;0,Knn)
      p(g1|g2) = N(g1;0,Knm)
    And
      q(g2) = N(g2;f,q_sqrt*q_sqrt^T)
    This method computes the mean and (co)variance of
      q(g1) = \int q(g2) p(g1|g2)
    :param Kmn: M x N
    :param Kmm: M x M
    :param Knn: N x N  or  N
    :param f: M x R
    :param full_cov: bool
    :param q_sqrt: None or R x M x M (lower triangular)
    :param white: bool
    :return: N x R  or R x N x N
    """

    logger.debug("base conditional")
    # compute kernel stuff
    num_func = tf.shape(f)[1]  # R
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = Knn - tf.matmul(A, A, transpose_a=True)
        fvar = tf.tile(fvar[None, :, :], [num_func, 1, 1])  # R x N x N
    else:
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(fvar[None, :], [num_func, 1])  # R x N

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(A, f, transpose_a=True)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.matrix_band_part(q_sqrt, -1, 0)  # R x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # R x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # R x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x N

    if not full_cov:
        fvar = tf.transpose(fvar)  # N x R

    return fmean, fvar  # N x R, R x N x N or N x R
예제 #57
0
  def _encode(self, tensor_dict: Dict[bytes, tf.Tensor]) -> tf.Tensor:
    """Encode an input tensor based on DKF model.

    Args:
      tensor_dict: A dict of tensor.

    Returns:
      dict of network output tensor.
    """
    train_and_predict_tensors = self._get_train_and_predict_tensors(
        tensor_dict, self._config.sys_id_len)
    obs_train_tensor, obs_train_mask_tensor, intervention_train_tensor, obs_to_trigger_tensor, obs_to_trigger_mask_tensor, intervention_to_trigger_tensor, biomarker_boolean_mask_tensor = train_and_predict_tensors  # pylint:

    batch_size = obs_train_tensor.get_shape().as_list()[0]

    states = self.deep_smooth(obs_train_tensor, intervention_train_tensor,
                              obs_train_mask_tensor)

    state_for_prediction = self.deep_smooth(obs_to_trigger_tensor,
                                            intervention_to_trigger_tensor,
                                            obs_to_trigger_mask_tensor)

    # mu_smooth shape [bs, tlen, _z_dim]
    mu_smooth = states[0]
    mu_prediction = state_for_prediction[0]

    # mu_smooth shape [bs, tlen, _z_dim, _z_dim]
    sigma_smooth = states[1]
    sigma_prediction = state_for_prediction[1]

    # Sample from smoothing distribution
    if self._config.use_jitter:
      jitter = 1e-2 * tf.eye(
          tf.shape(sigma_smooth)[-1], batch_shape=tf.shape(sigma_smooth)[0:-2])
      mvn_smooth = tfp.distributions.MultivariateNormalTriL(
          mu_smooth, sigma_smooth + jitter)
    else:
      mvn_smooth = tfp.distributions.MultivariateNormalTriL(
          mu_smooth, sigma_smooth)

    # Note the following method is not stable on cholesky op.
    # mvn_smooth = MultivariateNormalTriL(mu_smooth, tf.cholesky(Sigma_smooth))
    # z_smooth shape [bs, tlen, _z_dim];
    z_smooth = mvn_smooth.sample()

    # Transition distribution \prod_{t=2}^T p(z_t|z_{t-1}, u_{t})
    # We use tm1 to denote t-1;
    # state_tran_z_tm1 to denote state_tran(z_{t-1}).
    # control_tran_u_t to denote control_tran(u_t).
    # We need to evaluate N(z_t; state_tran_z_tm1 + control_tran_u_t, Q)
    # Roll left to remove the first input
    # intervention_tensor: [bs, tlen, _u_dim]
    z_tm1 = z_smooth[:, :-1, :]
    u_t = intervention_train_tensor[:, 1:, :]
    tf.logging.info(u_t)
    # mu_transition shape [bs * (tlen - 1), _z_dim]
    mu_transition = tf.reshape(
        self.state_tran(z_tm1) + self.control_tran(u_t), [-1, self._z_dim])

    # z_t_transition [bs * (tlen - 1), _z_dim]
    z_t_transition = tf.reshape(z_smooth[:, 1:, :], [-1, self._z_dim])

    # We transform the rand var to be zero-mean:
    # N(z_t; Az_tm1 + Bu_t, Q) as N(z_t - Az_tm1 - Bu_t; 0, Q)
    trans_centered = z_t_transition - mu_transition
    # mvn_transition [bs * (tlen - 1), self._z_dim]
    mvn_transition = MultivariateNormalTriL(
        tf.zeros(self._z_dim), tf.cholesky(self.state_noise))
    # log_prob_transition [bs * (tlen - 1)]
    log_prob_transition = mvn_transition.log_prob(trans_centered)

    ## Emission distribution \prod_{t=1}^T p(obs_t|z_t)
    # We need to evaluate N(y_t; Cz_t, R). We write it as N(y_t - Cz_t; 0, R)
    # z_smooth shape [bs, tlen, z_dim];
    # self.obs_emission shape [a_dim, z_dim];
    # obs_emission_z_t shape [bs, tlen, _a_dim]
    obs_emission_z_t = self.obs_emission(z_smooth)
    obs_emission_z_t_resh = tf.reshape(obs_emission_z_t,
                                       [-1, self._out_obs_dim])

    # observation tensor reshaped.
    tf.logging.info(biomarker_boolean_mask_tensor)  # [num_obs]
    tf.logging.info(obs_train_tensor)  # [bs, tlen, num_obs]
    y_t_resh = tf.reshape(
        tf.transpose(
            tf.boolean_mask(
                tf.transpose(obs_train_tensor, [2, 0, 1]),
                biomarker_boolean_mask_tensor), [1, 2, 0]),
        [-1, self._out_obs_dim])
    emiss_centered = y_t_resh - obs_emission_z_t_resh
    mask_flat = tf.reshape(
        tf.transpose(
            tf.boolean_mask(
                tf.transpose(obs_train_mask_tensor, [2, 0, 1]),
                biomarker_boolean_mask_tensor), [1, 2, 0]),
        [-1, self._out_obs_dim])
    # set missing obs emission center to be zero.
    # emiss_centered shape [bs * tlen, _a_dim]
    emiss_centered = tf.multiply(mask_flat, emiss_centered)

    mvn_emission = MultivariateNormalTriL(
        tf.zeros(self._out_obs_dim), tf.cholesky(self.obs_noise))

    # log_prob_emission shape [bs * tlen].
    log_prob_emission = mvn_emission.log_prob(emiss_centered)

    if self._config.pretrain_interv:
      # Interv distribution \prod_{t=0}^T-1 p(interv_t+1|z_t)
      interv_forecast_z_t = self.interv_forecast(z_tm1)
      interv_forecast_z_t_resh = tf.reshape(interv_forecast_z_t,
                                            [-1, self._u_dim])
      u_t_resh = tf.reshape(u_t, [-1, self._u_dim])

      interv_centered = u_t_resh - interv_forecast_z_t_resh
      mvn_interv = MultivariateNormalTriL(
          tf.zeros(self._u_dim), tf.cholesky(self.interv_noise))

      # log_prob_interv shape [bs * tlen].
      log_prob_interv = mvn_interv.log_prob(interv_centered)

    ## Distribution of the initial state p(z_1|z_0)
    z_0 = z_smooth[:, 0, :]
    init_mu = tf.zeros([batch_size, self._z_dim])
    init_sigma = tf.reshape(
        tf.tile(
            tf.eye(self._z_dim, num_columns=self._z_dim),
            tf.constant([batch_size, 1])),
        [batch_size, self._z_dim, self._z_dim])
    mvn_0 = MultivariateNormalTriL(init_mu, tf.cholesky(init_sigma))
    log_prob_0 = mvn_0.log_prob(z_0)

    # Entropy log(\prod_{t=1}^T p(z_t|y_{1:T}, u_{1:T}))
    entropy = -mvn_smooth.log_prob(z_smooth)
    entropy = tf.reshape(entropy, [-1])
    # entropy = tf.zeros(())

    log_probs = [
        tf.reduce_mean(log_prob_transition),
        tf.reduce_mean(log_prob_emission),
        tf.reduce_mean(log_prob_0),
        tf.reduce_mean(entropy)
    ]
    if self._config.pretrain_interv:
      log_probs = log_probs + [tf.reduce_mean(log_prob_interv)]

    kf_elbo = tf.reduce_sum(log_probs)

    state_loss = [
        tf.reduce_mean(log_prob_transition),
        tf.reduce_mean(log_prob_0),
        tf.reduce_mean(entropy)
    ]
    state_only_loss = tf.reduce_sum(state_loss)

    output = dict()
    # loss and obs prediction.
    if self._config.sys_id_len > 0:
      tlen = self._config.sys_id_len
    else:
      tlen = self._config.context_window_size
    # obs_est starting from t=2
    # obs_est only for output prediction, not used for loss computation.
    output['obs_est'] = tf.reshape(obs_emission_z_t,
                                   [-1, tlen, self._out_obs_dim])

    # mu_smooth shape [bs, tlen, z_dim];
    # final state_encoding shape [bs, z_dim]
    output['state_encoding'] = mu_prediction[:, -1, :]

    # final state_traj_encoding shape [bs, tlen, z_dim]
    output['state_traj_encoding'] = mu_prediction[:, :, :]

    # full_state_encoding carries mu_smooth[:, -1, :] and
    # sigma_smooth [:, -1, :, :] to reconstruct the full distribution.
    # Its shape is [bs, z_dim, z_dim + 1]
    output['full_state_encoding'] = tf.concat([
        tf.expand_dims(mu_prediction[:, -1, :], axis=-1),
        sigma_prediction[:, -1, :, :]
    ],
                                              axis=2)
    if self._config.state_only_loss:
      output['loss'] = -state_only_loss
    else:
      output['loss'] = -kf_elbo
    # output['last_obs']  shape [bs, _out_obs_dim]
    output['last_obs'] = tf.squeeze(
        tf.slice(obs_to_trigger_tensor,
                 [0, self._config.context_len_to_trigger - 1, 0], [-1, 1, -1]))
    if self._config.forecast_biomarkers:
      # switch shape to [_out_obs_dim, bs] for applying mask.
      output['last_obs'] = tf.boolean_mask(
          tf.transpose(output['last_obs']), biomarker_boolean_mask_tensor)
      # transpose shape back.
      output['last_obs'] = tf.transpose(output['last_obs'])

    output['state_loss'] = state_loss

    tf.logging.info(output)
    return output
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

sess = tf.Session()
x_vals = np.linspace(0, 10, 100)
y_vals = x_vals + np.random.normal(0, 1, 100)
x_vals_column = np.transpose(np.matrix(x_vals))
ones_column = np.transpose(np.matrix(np.repeat(1, 100)))
A = np.column_stack((x_vals_column, ones_column))
b = np.transpose(np.matrix(y_vals))
A_tensor = tf.constant(A)
b_tensor = tf.constant(b)

A_A = tf.matmul(tf.transpose(A_tensor), A_tensor)
L = tf.cholesky(A_A)
A_b = tf.matmul(tf.transpose(A_tensor), b)

sol1 = tf.matrix_solve(L, A_b)
sol2 = tf.matrix_solve(tf.transpose(L), sol1)

solution_eval = sess.run(sol2)
slope = solution_eval[0][0]
y_intercept = solution_eval[1][0]

print('slope: ' + str(slope))
print('y_intercept: ' + str(y_intercept))

best_fit = []
for i in x_vals:
    best_fit.append(slope * i + y_intercept)
예제 #59
0
  def __init__(self,
               loc=None,
               covariance_matrix=None,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateNormalFullCovariance"):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `covariance_matrix` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `covariance_matrix`. The last dimension of `loc` (if provided) must
    broadcast with this.

    A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive
    definite matrix.  In other words it is (real) symmetric with all eigenvalues
    strictly positive.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      covariance_matrix: Floating-point, symmetric positive definite `Tensor` of
        same `dtype` as `loc`.  The strict upper triangle of `covariance_matrix`
        is ignored, so if `covariance_matrix` is not symmetric no error will be
        raised (unless `validate_args is True`).  `covariance_matrix` has shape
        `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if neither `loc` nor `covariance_matrix` are specified.
    """
    parameters = dict(locals())

    # Convert the covariance_matrix up to a scale_tril and call MVNTriL.
    with tf.name_scope(name) as name:
      with tf.name_scope("init", values=[loc, covariance_matrix]):
        dtype = dtype_util.common_dtype([loc, covariance_matrix], tf.float32)
        loc = loc if loc is None else tf.convert_to_tensor(
            loc, name="loc", dtype=dtype)
        if covariance_matrix is None:
          scale_tril = None
        else:
          covariance_matrix = tf.convert_to_tensor(
              covariance_matrix, name="covariance_matrix", dtype=dtype)
          if validate_args:
            covariance_matrix = control_flow_ops.with_dependencies([
                tf.assert_near(
                    covariance_matrix,
                    tf.matrix_transpose(covariance_matrix),
                    message="Matrix was not symmetric")
            ], covariance_matrix)
          # No need to validate that covariance_matrix is non-singular.
          # LinearOperatorLowerTriangular has an assert_non_singular method that
          # is called by the Bijector.
          # However, cholesky() ignores the upper triangular part, so we do need
          # to separately assert symmetric.
          scale_tril = tf.cholesky(covariance_matrix)
        super(MultivariateNormalFullCovariance, self).__init__(
            loc=loc,
            scale_tril=scale_tril,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            name=name)
    self._parameters = parameters
예제 #60
0
def draw_GP(Yi, Ti, Xi, ind_kfi, ind_kti):
    """ 
    given GP hyperparams and data values at observation times, draw from 
    conditional GP
    
    inputs:
        length,noises,Lf,Kf: GP params
        Yi: observation values
        Ti: observation times
        Xi: grid points (new times for rnn)
        ind_kfi,ind_kti: indices into Y
    returns:
        draws from the GP at the evenly spaced grid times Xi, given hyperparams and data
    """
    ny = tf.shape(Yi)[0]
    K_tt = OU_kernel(length, Ti, Ti)
    D = tf.diag(noises)

    grid_f = tf.meshgrid(ind_kfi, ind_kfi)  #same as np.meshgrid
    Kf_big = tf.gather_nd(Kf, tf.stack((grid_f[0], grid_f[1]), -1))

    grid_t = tf.meshgrid(ind_kti, ind_kti)
    Kt_big = tf.gather_nd(K_tt, tf.stack((grid_t[0], grid_t[1]), -1))

    Kf_Ktt = tf.multiply(Kf_big, Kt_big)

    DI_big = tf.gather_nd(D, tf.stack((grid_f[0], grid_f[1]), -1))
    DI = tf.diag(tf.diag_part(DI_big))  #D kron I

    #data covariance.
    #Either need to take Cholesky of this or use CG / block CG for matrix-vector products
    Ky = Kf_Ktt + DI + 1e-6 * tf.eye(ny)

    ### build out cross-covariances and covariance at grid

    nx = tf.shape(Xi)[0]

    K_xx = OU_kernel(length, Xi, Xi)
    K_xt = OU_kernel(length, Xi, Ti)

    ind = tf.concat([tf.tile([i], [nx]) for i in range(M)], 0)
    grid = tf.meshgrid(ind, ind)
    Kf_big = tf.gather_nd(Kf, tf.stack((grid[0], grid[1]), -1))
    ind2 = tf.tile(tf.range(nx), [M])
    grid2 = tf.meshgrid(ind2, ind2)
    Kxx_big = tf.gather_nd(K_xx, tf.stack((grid2[0], grid2[1]), -1))

    K_ff = tf.multiply(Kf_big, Kxx_big)  #cov at grid points

    full_f = tf.concat([tf.tile([i], [nx]) for i in range(M)], 0)
    grid_1 = tf.meshgrid(full_f, ind_kfi, indexing='ij')
    Kf_big = tf.gather_nd(Kf, tf.stack((grid_1[0], grid_1[1]), -1))
    full_x = tf.tile(tf.range(nx), [M])
    grid_2 = tf.meshgrid(full_x, ind_kti, indexing='ij')
    Kxt_big = tf.gather_nd(K_xt, tf.stack((grid_2[0], grid_2[1]), -1))

    K_fy = tf.multiply(Kf_big, Kxt_big)

    #now get draws!
    y_ = tf.reshape(Yi, [-1, 1])
    #Mu = tf.matmul(K_fy,CG(Ky,y_)) #May be faster with CG for large problems
    Ly = tf.cholesky(Ky)
    Mu = tf.matmul(K_fy, tf.cholesky_solve(Ly, y_))

    #TODO: it's worth testing to see at what point computation speedup of Lanczos algorithm is useful & needed.
    # For smaller examples, using Cholesky will probably be faster than this unoptimized Lanczos implementation.
    # Likewise for CG and BCG vs just taking the Cholesky of Ky once
    """
    #Never need to explicitly compute Sigma! Just need matrix products with Sigma in Lanczos algorithm
    def Sigma_mul(vec):
        # vec must be a 2d tensor, shape (?,?) 
        return tf.matmul(K_ff,vec) - tf.matmul(K_fy,block_CG(Ky,tf.matmul(tf.transpose(K_fy),vec))) 
    
    def small_draw():   
        return Mu + tf.matmul(tf.cholesky(Sigma),xi)
    def large_draw():             
        return Mu + block_Lanczos(Sigma_mul,xi,n_mc_smps) #no need to explicitly reshape Mu
    
    BLOCK_LANC_THRESH = 1000
    draw = tf.cond(tf.less(nx*M,BLOCK_LANC_THRESH),small_draw,large_draw)     
    """

    xi = tf.random_normal((nx * M, n_mc_smps))
    Sigma = K_ff - tf.matmul(K_fy, tf.cholesky_solve(
        Ly, tf.transpose(K_fy))) + 1e-6 * tf.eye(tf.shape(K_ff)[0])
    draw = Mu + tf.matmul(tf.cholesky(Sigma), xi)
    draw_reshape = tf.transpose(tf.reshape(tf.transpose(draw),
                                           [n_mc_smps, M, nx]),
                                perm=[0, 2, 1])
    return draw_reshape