def entropy_gradient(z, method): K, dimZ = z.get_shape().as_list() # Epanechnikov kernel Kxy, h_square = Epanechnikov_kernel(z, K) dxkxy = -2 * K * (tf.reduce_mean(z, 0) - z) / dimZ lbd = 0.01 # stein's method if method == 'stein': K_ = Kxy + lbd * tf.diag(tf.ones(shape=(K, ))) # test U-statistic (didn't work well?) #K_ = K_ - Kxy * tf.diag(tf.ones(shape=(K,))) entropy_grad = tf.matrix_solve(K_, dxkxy) entropy_loss = tf.reduce_mean( tf.stop_gradient(entropy_grad) * z) # an estimation for the entropy gradient # plug-in estimates if method == 'kde': entropy_grad = dxkxy / (tf.reduce_sum(Kxy, 1, keep_dims=True) + lbd) entropy_loss = tf.reduce_mean(tf.stop_gradient(entropy_grad) * z) # Score matching if method == 'score': z_ = tf.expand_dims(z, 1) z_mean = tf.reduce_mean(z, 0) T = tf.reduce_mean(z * z_, 2) + tf.reduce_mean(z**2) - tf.reduce_mean( (z + z_) * z_mean, 2) a = tf.matrix_solve(T + lbd * tf.diag(tf.ones(shape=(K, ))), dimZ * tf.ones(shape=(K, 1))) * 0.5 entropy_grad = -2.0 / dimZ * (tf.reduce_sum(a * z, 0) - tf.reduce_sum(a) * z) entropy_loss = tf.reduce_mean(tf.stop_gradient(entropy_grad) * z) return entropy_loss, Kxy, h_square
def compute_reward(self, m, s): ''' Reward function, calculating mean and variance of rewards, given mean and variance of state distribution, along with the target State and a weight matrix. Input m : [1, k] Input s : [k, k] Output M : [1, 1] Output S : [1, 1] ''' # TODO: Clean up this SW = s @ self.W iSpW = tf.transpose( tf.matrix_solve((tf.eye(self.state_dim, dtype=float_type) + SW), tf.transpose(self.W), adjoint=True)) muR = tf.exp(-(m-self.t) @ iSpW @ tf.transpose(m-self.t)/2) / \ tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + SW) ) i2SpW = tf.transpose( tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + 2 * SW), tf.transpose(self.W), adjoint=True)) r2 = tf.exp(-(m-self.t) @ i2SpW @ tf.transpose(m-self.t)) / \ tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + 2*SW) ) sR = r2 - muR @ muR return muR, sR
def build_correction_term(self): # TODO Mb = tf.shape(self.Z)[0] Ma = self.M_old # jitter = settings.numerics.jitter_level jitter = 1e-4 Saa = self.Su_old ma = self.mu_old obj = 0 # a is old inducing points, b is new mu, Sigma = self.build_predict(self.Z_old, full_cov=True) Sigma = Sigma[:, :, 0] Smm = Sigma + tf.matmul(mu, tf.transpose(mu)) Kaa = self.Kaa_old + np.eye(Ma) * jitter LSa = tf.cholesky(Saa) LKa = tf.cholesky(Kaa) obj += tf.reduce_sum(tf.log(tf.diag_part(LKa))) obj += - tf.reduce_sum(tf.log(tf.diag_part(LSa))) Sainv_ma = tf.matrix_solve(Saa, ma) obj += -0.5 * tf.reduce_sum(ma * Sainv_ma) obj += tf.reduce_sum(mu * Sainv_ma) Sainv_Smm = tf.matrix_solve(Saa, Smm) Kainv_Smm = tf.matrix_solve(Kaa, Smm) obj += -0.5 * tf.reduce_sum(tf.diag_part(Sainv_Smm) - tf.diag_part(Kainv_Smm)) return obj
def test_MatrixSolve(self): t = tf.matrix_solve(*self.random((2, 3, 3, 3), (2, 3, 3, 1)), adjoint=False) self.check(t) t = tf.matrix_solve(*self.random((2, 3, 3, 3), (2, 3, 3, 1)), adjoint=True) self.check(t)
def sample_x_per_comp(eta1, eta2, nb_samples, seed=0): """ Args: eta1: 1st Gaussian natural parameter, shape = N, K, L, 1 eta2: 2nd Gaussian natural parameter, shape = N, K, L, L nb_samples: nb of samples to generate for each of the K components seed: random seed Returns: x ~ N(x|eta1[k], eta2[k]), nb_samples times for each of the K components. """ with tf.name_scope('sample_x_k'): inv_sigma = -2 * eta2 N, K, _, D = eta2.get_shape() # cholesky decomposition and adding noise (raw_noise is of dimension (DxB), where B is the size of MC samples) L = tf.cholesky(inv_sigma) # sample_shape = (D, nb_samples) sample_shape = (int(N), int(K), int(D), nb_samples) raw_noise = tf.random_normal(sample_shape, mean=0., stddev=1., seed=seed) noise = tf.matrix_solve(tf.matrix_transpose(L), raw_noise) # reparam-trick-sampling: x_samps = mu_tilde + noise: shape = N, K, S, D x_k_samps = tf.transpose(tf.matrix_solve(inv_sigma, eta1) + noise, [0, 1, 3, 2], name='samples') return x_k_samps
def linear_regression(): # To solve Ax + b = y A_val = np.linspace(0, 10, 100) b_val = np.repeat(1, 100) y_val = A_val + np.random.normal(0, 1, 100) A_col = np.transpose(np.matrix(A_val)) b_col = np.transpose(np.matrix(b_val)) A = np.column_stack((A_col, b_val)) y = np.transpose(np.matrix(y_val)) A_tensor = tf.constant(A) y_tensor = tf.constant(y) # To solve Ax = y # A'Ax = A'y # Use Cholesky Decomposition, A'A = LL' # LL'x = A'y # Solve L(sol1) = A'y # Solve L'x = sol1 L = tf.cholesky(tf.matmul( tf.transpose(A_tensor), A_tensor)) sol1 = tf.matrix_solve(L, tf.matmul( tf.transpose(A_tensor), y_tensor)) sol2 = tf.matrix_solve(tf.transpose(L), sol1) with tf.Session() as sess: sol = sess.run(sol2) print(sol)
def testNotInvertible(self): # The input should be invertible. with self.test_session(): with self.assertRaisesOpError("Input matrix is not invertible."): # All rows of the matrix below add to zero matrix = tf.constant([[1.0, 0.0, -1.0], [-1.0, 1.0, 0.0], [0.0, -1.0, 1.0]]) tf.matrix_solve(matrix, matrix).eval()
def testWrongDimensions(self): # The matrix and right-hand sides should have the same number of rows. with self.test_session(): matrix = tf.constant([[1., 0.], [0., 1.]]) rhs = tf.constant([[1., 0.]]) with self.assertRaises(ValueError): tf.matrix_solve(matrix, rhs)
def testNonSquareMatrix(self): # When the solve of a non-square matrix is attempted we should return # an error with self.test_session(): with self.assertRaises(ValueError): matrix = tf.constant([[1., 2., 3.], [3., 4., 5.]]) tf.matrix_solve(matrix, matrix)
def testNotInvertible(self): # The input should be invertible. with self.test_session(): with self.assertRaisesOpError("Input matrix is not invertible."): # All rows of the matrix below add to zero matrix = tf.constant([[1., 0., -1.], [-1., 1., 0.], [0., -1., 1.]]) tf.matrix_solve(matrix, matrix).eval()
def _no_cho(Kf=Kf, y=y, hp=hp, Kf_diff=Kf_diff): Kf = (Kf + tf.transpose(Kf, perm=[0, 2, 1])) / 2. e, v = tf.self_adjoint_eig(Kf) e = tf.where(e > 1e-14, e, 1e-14 * tf.ones_like(e)) Kf = tf.matmul(tf.matmul(v, tf.matrix_diag(e), transpose_a=True), v) logdet = tf.reduce_sum(tf.where(e > 1e-14, tf.log(e), tf.zeros_like(e)), axis=-1, name='logdet') #batch_size, n, 1 alpha = tf.matrix_solve(Kf, tf.expand_dims(y, -1), name='solve_alpha') neg_log_mar_like = ( tf.reduce_sum(y * tf.squeeze(alpha, axis=2), axis=1) + logdet + n * np.log(2. * np.pi)) / 2. aa = tf.matmul(alpha, alpha, transpose_b=True) grad = {} for name in Kf_diff: #tf.unstack(Kf_diff,axis=1): k_diff = Kf_diff[name] aaK = tf.matmul(aa, k_diff, name='aaK') KK = tf.matrix_solve(Kf, k_diff, name='KK') grad_ = (tf.trace(aaK) - tf.trace(KK)) / 2. grad_ = tf.where(tf.is_finite(grad_), grad_, tf.zeros_like(grad_)) grad[name] = -grad_ return neg_log_mar_like, grad
def predict_given_factorizations(self, m, s, iK, beta): """ Approximate GP regression at noisy inputs via moment matching IN: mean (m) (row vector) and (s) variance of the state OUT: mean (M) (row vector), variance (S) of the action and inv(s)*input-ouputcovariance """ s = tf.tile(s[None, None, :, :], [self.num_outputs, self.num_outputs, 1, 1]) inp = tf.tile( self.centralized_input(m)[None, :, :], [self.num_outputs, 1, 1]) # Calculate M and V: mean and inv(s) times input-output covariance iL = tf.matrix_diag(1 / self.lengthscales) iN = inp @ iL B = iL @ s[0, ...] @ iL + tf.eye(self.num_dims, dtype=float_type) # Redefine iN as in^T and t --> t^T # B is symmetric so its the same t = tf.linalg.transpose( tf.matrix_solve(B, tf.linalg.transpose(iN), adjoint=True), ) lb = tf.exp(-tf.reduce_sum(iN * t, -1) / 2) * beta tiL = t @ iL c = self.variance / tf.sqrt(tf.linalg.det(B)) M = (tf.reduce_sum(lb, -1) * c)[:, None] V = tf.matmul(tiL, lb[:, :, None], adjoint_a=True)[..., 0] * c[:, None] # Calculate S: Predictive Covariance R = s @ tf.matrix_diag(1 / tf.square(self.lengthscales[None, :, :]) + 1 / tf.square(self.lengthscales[:, None, :]) ) + tf.eye(self.num_dims, dtype=float_type) # TODO: change this block according to the PR of tensorflow. Maybe move it into a function? X = inp[None, :, :, :] / tf.square(self.lengthscales[:, None, None, :]) X2 = -inp[:, None, :, :] / tf.square(self.lengthscales[None, :, None, :]) Q = tf.matrix_solve(R, s) / 2 Xs = tf.reduce_sum(X @ Q * X, -1) X2s = tf.reduce_sum(X2 @ Q * X2, -1) maha = -2 * tf.matmul(X @ Q, X2, adjoint_b=True) + \ Xs[:, :, :, None] + X2s[:, :, None, :] # k = tf.log(self.variance)[:, None] - \ tf.reduce_sum(tf.square(iN), -1)/2 L = tf.exp(k[:, None, :, None] + k[None, :, None, :] + maha) S = (tf.tile( beta[:, None, None, :], [1, self.num_outputs, 1, 1]) @ L @ tf.tile( beta[None, :, :, None], [self.num_outputs, 1, 1, 1]))[:, :, 0, 0] diagL = tf.transpose(tf.linalg.diag_part(tf.transpose(L))) S = S - tf.diag(tf.reduce_sum(tf.multiply(iK, diagL), [1, 2])) S = S / tf.sqrt(tf.linalg.det(R)) S = S + tf.diag(self.variance) S = S - M @ tf.transpose(M) return tf.transpose(M), S, tf.transpose(V)
def testNonSquareMatrix(self): # When the solve of a non-square matrix is attempted we should return # an error with self.test_session(): with self.assertRaises(ValueError): matrix = tf.constant([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]) tf.matrix_solve(matrix, matrix)
def testWrongDimensions(self): # The matrix and right-hand sides should have the same number of rows. with self.test_session(): matrix = tf.constant([[1.0, 0.0], [0.0, 1.0]]) rhs = tf.constant([[1.0, 0.0]]) with self.assertRaises(ValueError): tf.matrix_solve(matrix, rhs)
def matrix_solve(matrix, rhs, adjoint=False, name=None): """Broadcasting batch matrix solve.""" try: return tf.matrix_solve(matrix, rhs, adjoint=adjoint, name=name) except ValueError: matrix, rhs = tf_utils.broadcast_outer_dims((matrix, 2), (rhs, 2)) return tf.matrix_solve(matrix, rhs, adjoint=adjoint, name=name)
def Bound2(phi_0, phi_1, phi_2, sigma_noise, K_mm, mean_y): # Preliminary Bound beta = 1 / tf.square(sigma_noise) bound = 0 N = h.get_dim(mean_y, 0) M = h.get_dim(K_mm, 0) W_inv_part = beta * phi_2 + K_mm global phi_200 phi_200 = tf.matrix_solve(W_inv_part, tf.transpose(phi_1)) W = beta * np.eye(N) - tf.square(beta) * h.Mul( phi_1, tf.matrix_solve(W_inv_part, tf.transpose(phi_1))) # Computations bound += N * tf.log(beta) bound += h.log_det(K_mm + 1e-3 * np.eye(M)) bound -= h.Mul(tf.transpose(mean_y), W, mean_y) global matrix_determinant matrix_determinant = tf.ones( 1 ) #h.log_det(W_inv_part+1e2*np.eye(M))#-1e-40*tf.exp(h.log_det(W_inv_part)) bound -= h.log_det(W_inv_part + 1e-3 * tf.reduce_mean(W_inv_part) * np.eye(M)) bound -= beta * phi_0 bound += beta * tf.trace(tf.cholesky_solve(tf.cholesky(K_mm), phi_2)) bound = bound * 0.5 return bound
def pred(X,X_m_1,mu,len_sc_1,noise_1): Kmm=h.tf_SE_K(X_m_1,X_m_1,len_sc_1,noise_1) Knm=h.tf_SE_K(X,X_m_1,len_sc_1,noise_1) posterior_mean= h.Mul(Knm,tf.matrix_solve(Kmm,mu)) K_nn=h.tf_SE_K(X,X,len_sc_1,noise_1) full_cov=K_nn-h.Mul(Knm,tf.matrix_solve(Kmm,tf.transpose(Knm))) posterior_cov=tf.diag_part(full_cov) return posterior_mean,tf.reshape(posterior_cov,[N,1]),full_cov
def pred(X, X_m_1, mu, len_sc_1, noise_1): Kmm = h.tf_SE_K(X_m_1, X_m_1, len_sc_1, noise_1) Knm = h.tf_SE_K(X, X_m_1, len_sc_1, noise_1) posterior_mean = h.Mul(Knm, tf.matrix_solve(Kmm, mu)) K_nn = h.tf_SE_K(X, X, len_sc_1, noise_1) full_cov = K_nn - h.Mul(Knm, tf.matrix_solve(Kmm, tf.transpose(Knm))) posterior_cov = tf.diag_part(full_cov) return posterior_mean, tf.reshape(posterior_cov, [N, 1]), full_cov
def predict(K_mn,sigma,K_mm,K_nn): # predicitions K_nm=tf.transpose(K_mn) Sig_Inv=1e-1*np.eye(M)+K_mm+K_mnnm_2/tf.square(sigma) mu_post=h.Mul(tf.matrix_solve(Sig_Inv,K_mn),Ytr)/tf.square(sigma) mean=h.Mul(K_nm,mu_post) variance=K_nn-h.Mul(K_nm,h.safe_chol(K_mm,K_mn))+h.Mul(K_nm,tf.matrix_solve(Sig_Inv,K_mn)) var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma),[N,1])) return mean, var_terms
def compute_log_z_given_y(eta1_phi1, eta2_phi1, eta1_phi2, eta2_phi2, pi_phi2, name='log_q_z_given_y_phi'): """ Args: eta1_phi1: encoder output; shape = N, K, L eta2_phi1: encoder output; shape = N, K, L, L eta1_phi2: GMM-EM parameter; shape = K, L eta2_phi2: GMM-EM parameter; shape = K, L, L name: tensorflow name scope Returns: log q(z|y, phi) """ with tf.name_scope(name): N, L = eta1_phi1.get_shape().as_list() assert eta2_phi1.get_shape() == (N, L, L) K, L2 = eta1_phi2.get_shape().as_list() assert L2 == L assert eta2_phi2.get_shape() == (K, L, L) # combine eta2_phi1 and eta2_phi2 eta2_phi_tilde = tf.add( tf.expand_dims(eta2_phi1, axis=1), tf.expand_dims( eta2_phi2, axis=0)) # w_eta2 = -0.5 * inv(sigma_phi1 + sigma_phi2) solved = tf.matrix_solve( eta2_phi_tilde, tf.tile(tf.expand_dims(eta2_phi2, axis=0), [N, 1, 1, 1])) w_eta2 = tf.einsum('nju,nkui->nkij', eta2_phi1, solved) # for nummerical stability... w_eta2 = tf.divide( w_eta2 + tf.matrix_transpose(w_eta2), 2., name='symmetrised') # w_eta1 = inv(sigma_phi1 + sigma_phi2) * mu_phi2 w_eta1 = tf.einsum( 'nuj,nkuv->nkj', eta2_phi1, tf.matrix_solve( eta2_phi_tilde, tf.tile( tf.expand_dims(tf.expand_dims(eta1_phi2, axis=0), axis=-1), [N, 1, 1, 1])) # shape inside solve= N, K, D, 1 ) # w_eta1.shape = N, K, D # compute means mu_phi1, _ = gaussian.natural_to_standard(eta1_phi1, eta2_phi1) # compute log_z_given_y_phi return gaussian.log_probability_nat(mu_phi1, w_eta1, w_eta2, pi_phi2) #, (w_eta1, w_eta2)
def body(i, X, R_, R, V_): S = tf.matrix_solve(tf.matmul(tf.transpose(R_), R_), tf.matmul(tf.transpose(R), R)) V = R + tf.matmul(V_, S) T = tf.matrix_solve(tf.matmul(tf.transpose(V), tf.matmul(A_, V)), tf.matmul(tf.transpose(R), R)) X = X + tf.matmul(V, T) V_ = V R_ = R R = R - tf.matmul(A_, tf.matmul(V, T)) return i + 1, X, R_, R, V_
def build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ Mb = tf.shape(self.Z)[0] Ma = self.M_old jitter = settings.numerics.jitter_level # jitter = 1e-4 sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) N = self.num_data Saa = self.Su_old ma = self.mu_old # a is old inducing points, b is new # f is training points Kfdiag = self.kern.Kdiag(self.X) (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c, err, Qff) = self._build_common_terms() LSa = tf.cholesky(Saa) Lainv_ma = tf.matrix_triangular_solve(LSa, ma, lower=True) bound = 0 # constant term bound = -0.5 * N * np.log(2 * np.pi) # quadratic term bound += -0.5 * tf.reduce_sum(tf.square(err)) / sigma2 # bound += -0.5 * tf.reduce_sum(ma * Sainv_ma) bound += -0.5 * tf.reduce_sum(tf.square(Lainv_ma)) bound += 0.5 * tf.reduce_sum(tf.square(LDinv_Lbinv_c)) # log det term bound += -0.5 * N * tf.reduce_sum(tf.log(sigma2)) bound += -tf.reduce_sum(tf.log(tf.diag_part(LD))) # delta 1: trace term bound += -0.5 * tf.reduce_sum(Kfdiag) / sigma2 bound += 0.5 * tf.reduce_sum(tf.diag_part(Qff)) # delta 2: a and b difference bound += tf.reduce_sum(tf.log(tf.diag_part(La))) bound += -tf.reduce_sum(tf.log(tf.diag_part(LSa))) Kaadiff = Kaa_cur - tf.matmul(tf.transpose(Lbinv_Kba), Lbinv_Kba) Sainv_Kaadiff = tf.matrix_solve(Saa, Kaadiff) Kainv_Kaadiff = tf.matrix_solve(Kaa, Kaadiff) bound += -0.5 * tf.reduce_sum( tf.diag_part(Sainv_Kaadiff) - tf.diag_part(Kainv_Kaadiff)) return bound
def __init__(self, ind, y, U, m, B, lr): self.U = U self.m = m self.y = y.reshape([y.size,1]) self.ind = ind self.B = B self.learning_rate = lr self.nmod = len(self.U) self.tf_U = [tf.Variable(self.U[k], dtype=tf.float32) for k in range(self.nmod)] #dim. of pseudo input self.d = 0 for k in range(self.nmod): self.d = self.d + self.U[k].shape[1] #init mu, L, Z Zinit = self.init_pseudo_inputs() self.tf_Z = tf.Variable(Zinit, dtype=tf.float32) self.N = y.size #variational posterior self.tf_mu = tf.Variable(np.zeros([m,1]), dtype=tf.float32) self.tf_L = tf.Variable(np.eye(m), dtype=tf.float32) #kernel parameters self.tf_log_lengthscale = tf.Variable(0.0, dtype=tf.float32) self.tf_log_tau = tf.Variable(0.0, dtype=tf.float32) #Stochastic Variational ELBO #A mini-batch of observed entry indices self.tf_sub = tf.placeholder(tf.int32, shape=[None, self.nmod]) self.tf_y = tf.placeholder(tf.float32, shape=[None, 1]) tf_inputs = tf.concat([ tf.gather(self.tf_U[k], self.tf_sub[:, k]) for k in range(self.nmod) ], 1) print(tf_inputs.get_shape()) Ltril = tf.matrix_band_part(self.tf_L, -1, 0) Kmm = self.kernel_matrix(self.tf_Z) Kmn = self.kernel_cross(self.tf_Z, tf_inputs) Knm = tf.transpose(Kmn) KnmKmmInv = tf.transpose(tf.matrix_solve(Kmm, Kmn)) KnmKmmInvL = tf.matmul(KnmKmmInv, Ltril) tau = tf.exp(self.tf_log_tau) lengthscale = tf.exp(self.tf_log_lengthscale) hh_expt = tf.matmul(Ltril, tf.transpose(Ltril)) + tf.matmul(self.tf_mu, tf.transpose(self.tf_mu)) ELBO = -0.5*tf.linalg.logdet(Kmm) - 0.5*tf.trace(tf.matrix_solve(Kmm, hh_expt)) + 0.5*tf.reduce_sum(tf.log(tf.pow(tf.diag_part(Ltril), 2))) \ + 0.5*self.N*self.tf_log_tau - 0.5*tau*self.N/self.B*tf.reduce_sum(tf.pow(self.tf_y - tf.matmul(KnmKmmInv,self.tf_mu), 2)) \ - 0.5*tau*( self.N*(1+jitter) - self.N/self.B*tf.reduce_sum(KnmKmmInv*Knm) + self.N/self.B*tf.reduce_sum(tf.pow(KnmKmmInvL,2)) ) \ + 0.5*self.m - 0.5*self.N*tf.log(2.0*tf.constant(np.pi, dtype=tf.float32))#\ #- 0.5*tf.reduce_sum(tf.pow(self.tf_U[0],2)) - 0.5*tf.reduce_sum(tf.pow(self.tf_U[1],2)) - 0.5*tf.reduce_sum(tf.pow(self.tf_U[2],2)) #- 0.5*tf.pow(tau,2) - 0.5*tf.pow(lengthscale, 2) self.loss = -ELBO self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.minimizer = self.optimizer.minimize(self.loss) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def linRegrCholeskyDecomposition(): A_tensor, b_tensor = generatingData() #Cholesky decomposition L = tf.cholesky(tf.matmul(tf.transpose(A_tensor), A_tensor)) tA_b = tf.matmul(tf.transpose(A_tensor), b_tensor) sol1 = tf.matrix_solve(L, tA_b) sol2 = tf.matrix_solve(tf.transpose(L), sol1) solEval = sess.run(sol2) print("slope: ", solEval[0][0]) print("intercept: ", solEval[1][0])
def cholesky(): A, b = get_a_b() tA_A = tf.matmul(tf.transpose(A), A) L = tf.cholesky(tA_A) tA_b = tf.matmul(tf.transpose(A), b) sol1 = tf.matrix_solve(L, tA_b) # 解决多元一次方程组 sol2 = tf.matrix_solve(tf.transpose(L), sol1) return sol2
def blk_chol_inv(A, B, b, lower=True, transpose=False): """ Solve the equation Cx = b for x, where C is assumed to be a block-bi-diagonal matrix ( where only the first (lower or upper) off-diagonal block is nonzero. Inputs: A - [Batch_size x T x n x n] tensor, where each A[:,i,:,:] is the ith block diagonal matrix B - [Batch_size x T-1 x n x n] tensor, where each B[:,i,:,:] is the ith (upper or lower) 1st block off-diagonal matrix b - [Batch_size x T x n x 1] tensor lower (default: True) - boolean specifying whether to treat B as the lower or upper 1st block off-diagonal of matrix C transpose (default: False) - boolean specifying whether to transpose the off-diagonal blocks B[:,i,:,:] (useful if you want to compute solve the problem C^T x = b with a representation of C.) Outputs: x - solution of Cx = b """ def _step(acc, inputs): x = acc A, B, b = inputs return tf.matrix_solve(A, b - tf.matmul(B, x)) if transpose: A = tf.transpose(A, perm=[0, 1, 3, 2]) B = tf.transpose(B, perm=[0, 1, 3, 2]) if lower: x0 = tf.matrix_solve(A[:, 0], b[:, 0]) X = tf.scan(_step, [ tf.transpose(A[:, 1:], perm=[1, 0, 2, 3]), tf.transpose(B, perm=[1, 0, 2, 3]), tf.transpose(b[:, 1:], perm=[1, 0, 2, 3]) ], initializer=x0) X = tf.transpose(X, perm=[1, 0, 2, 3]) X = tf.concat([tf.expand_dims(x0, 1), X], 1) else: xN = tf.matrix_solve(A[:, -1], b[:, -1]) X = tf.scan(_step, [ tf.transpose(A[:, :-1], perm=[1, 0, 2, 3])[::-1], tf.transpose(B, perm=[1, 0, 2, 3])[::-1], tf.transpose(b[:, :-1], perm=[1, 0, 2, 3])[::-1] ], initializer=xN) X = tf.transpose(X, perm=[1, 0, 2, 3]) X = tf.concat([tf.expand_dims(xN, 1), X], 1)[:, ::-1] return X
def test_broadcast_apply_and_solve(self): # These cannot be done in the automated (base test class) tests since they # test shapes that tf.matmul cannot handle. # In particular, tf.matmul does not broadcast. with self.test_session() as sess: x = tf.random_normal(shape=(2, 2, 3, 4)) # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve # and apply with 'x' as the argument. diag = tf.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag) self.assertAllEqual((2, 1, 3, 3), operator.shape) # Create a batch matrix with the broadcast shape of operator. diag_broadcast = tf.concat(1, (diag, diag)) mat = tf.matrix_diag(diag_broadcast) self.assertAllEqual((2, 2, 3, 3), mat.get_shape()) # being pedantic. operator_apply = operator.apply(x) mat_apply = tf.matmul(mat, x) self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape()) self.assertAllClose(*sess.run([operator_apply, mat_apply])) operator_solve = operator.solve(x) mat_solve = tf.matrix_solve(mat, x) self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape()) self.assertAllClose(*sess.run([operator_solve, mat_solve]))
def _verifySolve(self, x, y, batch_dims=None): for adjoint in False, True: for np_type in [np.float32, np.float64]: a = x.astype(np_type) b = y.astype(np_type) if adjoint: a_np = np.conj(np.transpose(a)) else: a_np = a if batch_dims is not None: a = np.tile(a, batch_dims + [1, 1]) a_np = np.tile(a_np, batch_dims + [1, 1]) b = np.tile(b, batch_dims + [1, 1]) np_ans = np.linalg.solve(a_np, b) with self.test_session(): # Test the batch version, which works for ndim >= 2 tf_ans = tf.batch_matrix_solve(a, b, adjoint=adjoint) out = tf_ans.eval() self.assertEqual(tf_ans.get_shape(), out.shape) self.assertEqual(np_ans.shape, out.shape) self.assertAllClose(np_ans, out) if a.ndim == 2: # Test the simple version tf_ans = tf.matrix_solve(a, b, adjoint=adjoint) out = tf_ans.eval() self.assertEqual(out.shape, tf_ans.get_shape()) self.assertEqual(np_ans.shape, out.shape) self.assertAllClose(np_ans, out)
def log_probability_nat_per_samp(x_samps, eta1, eta2): """ Args: x_samps: matrix of shape (minibatch_size, nb_components, nb_samps, latent_dims) eta1: 1st natural parameter for Gaussian distr; shape: (size_minibatch, nb_components, latent_dim) eta2: 2nd natural parameter for Gaussian distr; shape: (size_minibatch, nb_components, latent_dim, latent_dim) Returns: 1/S sum^S_{s=1} log N(x^(s)|eta1, eta2) of shape (N, K, S) """ # same as above, but x consists of S samples for K components: x.shape = (N, K, S, D) # todo: merge with above function (above is the same but normalised) N, K, S, D = x_samps.get_shape().as_list() assert eta1.get_shape() == (N, K, D) assert eta2.get_shape() == (N, K, D, D) with tf.name_scope('log_prob_4d'): # -1\2 (sigma^(-1) * x) * x + sigma^(-1)*mu*x log_normal = tf.einsum('nksd,nksd->nks', tf.einsum('nkij,nksj->nksi', eta2, x_samps), x_samps) log_normal += tf.einsum('nki,nksi->nks', eta1, x_samps) # 1/4 (-2 * sigma * (sigma^(-1) * mu)) sigma^(-1) * mu = -1/2 mu sigma^(-1) mu; shape = N, K, 1 log_normal += 1.0 / 4 * tf.einsum('nkdi,nkd->nki', tf.matrix_solve(eta2, tf.expand_dims(eta1, axis=-1)), eta1) log_normal -= D/2. * tf.constant(np.log(2 * np.pi), dtype=tf.float32, name='log2pi') # + 1/2 log |sigma^(-1)| log_normal += 1.0 / 2 * tf.expand_dims(logdet(-2.0 * eta2 + 1e-20 * tf.eye(D)), axis=2) return log_normal
def logZ(self, nat_params): """ Compute log partition function from natparams shape: - nat_params: [batch,K,N,N+1] """ #logdet [K, N] = nat_params.get_shape().as_list()[1:3] idty = 2.0 * pi * tf.eye(N, batch_shape=[K]) #cste_term = tf.log(tf.reduce_prod(tf.matrix_diag_part(idty),axis=-1))# shape: [n_mixtures,] cste_term = tf.log(tf.matrix_determinant(idty)) # shape: [n_mixtures,] logdet = tf.expand_dims( -tf.log(tf.matrix_determinant(-2 * nat_params[:, :, :, 1:])) + cste_term, axis=-1) # shape: [batch,n_mixtures,1] #logdet = tf.expand_dims(-tf.log(tf.reduce_prod(tf.matrix_diag_part(-2*nat_params[:,:,:,1:]),axis=2,keep_dims=False))+ cste_term,axis=-1)# shape: [batch,n_mixtures,1] # Quadratic term mu = tf.matrix_solve( -2 * nat_params[:, :, :, 1:], tf.expand_dims(nat_params[:, :, :, 0], axis=-1)) # shape: [batch,n_mixtures,dim,1] musigmu = tf.squeeze(tf.matmul( tf.transpose(mu, perm=[0, 1, 3, 2]), tf.expand_dims(nat_params[:, :, :, 0], axis=-1)), axis=-1) # shape: [batch,n_mixtures,1] return tf.scalar_mul(0.5, tf.add(musigmu, logdet))
def __init__(self, pars, vars, eqns): self.pars = pars self.vars = vars self.eqns = eqns # size self.par_sz = {par: int(par.get_shape()[0]) for par in pars} self.var_sz = {var: int(var.get_shape()[0]) for var in vars} self.eqn_sz = {eqn: int(eqn.get_shape()[0]) for eqn in eqns} # equation system self.parvec = tf.concat(pars, 0) self.varvec = tf.concat(vars, 0) self.eqnvec = tf.concat(eqns, 0) self.error = tf.reduce_max(tf.abs(self.eqnvec)) # gradients self.parjac = tf.concat([tf.concat([jacobian(eqn, x) for x in pars], 1) for eqn in eqns], 0) self.varjac = tf.concat([tf.concat([jacobian(eqn, x) for x in vars], 1) for eqn in eqns], 0) # newton steps self.newton_step = -tf.squeeze(tf.matrix_solve(self.varjac, tf.expand_dims(self.eqnvec, 1))) self.newton_dvars = tf.split(self.newton_step, list(self.var_sz.values()), 0) self.newton_update = [tf.assign(v, v+s) for v, s in zip(self.vars, self.newton_dvars)] # target param self.tpars = [tf.zeros_like(p) for p in pars] self.tparvec = tf.concat(self.tpars, 0)
def init_train_updates(self): training_outputs = self.network.training_outputs last_error = self.variables.last_error error_func = self.variables.loss mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((self.target - training_outputs)**2) variables = self.network.variables params = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates
def _verifySolve(self, x, y, batch_dims=None): for adjoint in False, True: for np_type in [np.float32, np.float64, np.complex64, np.complex128]: if np_type is [np.float32, np.float64]: a = x.real().astype(np_type) b = y.real().astype(np_type) else: a = x.astype(np_type) b = y.astype(np_type) if adjoint: a_np = np.conj(np.transpose(a)) else: a_np = a if batch_dims is not None: a = np.tile(a, batch_dims + [1, 1]) a_np = np.tile(a_np, batch_dims + [1, 1]) b = np.tile(b, batch_dims + [1, 1]) np_ans = np.linalg.solve(a_np, b) with self.test_session(): tf_ans = tf.matrix_solve(a, b, adjoint=adjoint) out = tf_ans.eval() self.assertEqual(tf_ans.get_shape(), out.shape) self.assertEqual(np_ans.shape, out.shape) self.assertAllClose(np_ans, out)
def __init__(self, pars, vars, eqns): self.pars = pars self.vars = vars self.eqns = eqns # size self.par_sz = {par: int(par.get_shape()[0]) for par in pars} self.var_sz = {var: int(var.get_shape()[0]) for var in vars} self.eqn_sz = {eqn: int(eqn.get_shape()[0]) for eqn in eqns} # equation system self.parvec = tf.concat(pars, 0) self.varvec = tf.concat(vars, 0) self.eqnvec = tf.concat(eqns, 0) self.error = tf.reduce_max(tf.abs(self.eqnvec)) # gradients self.parjac = tf.concat([tf.concat([jacobian(eqn, x) for x in pars], 1) for eqn in eqns], 0) self.varjac = tf.concat([tf.concat([jacobian(eqn, x) for x in vars], 1) for eqn in eqns], 0) # newton steps self.newton_step = -tf.squeeze(tf.matrix_solve(self.varjac, tf.expand_dims(self.eqnvec, 1))) self.newton_dvars = tf.split(self.newton_step, list(self.var_sz.values()), 0) self.newton_update = [tf.assign(v, v+s) for v, s in zip(self.vars, self.newton_dvars)] # homotopy self.tv = tf.placeholder(dtype=tf.float64) self.par0 = [tf.Variable(np.zeros(p.shape)) for p in pars] self.par1 = [tf.Variable(np.zeros(p.shape)) for p in pars] # path gen self.path_assign = tf.group(*[p.assign((1-self.tv)*p0 + self.tv*p1) for p, p0, p1 in zip(pars, self.par0, self.par1)])
def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((network_output - prediction_func) ** 2) params = parameter_values(self.connection) param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)) ) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates
def testMultiplyInverseDense(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) block = fb.EmbeddingKFACMultiIndepFB(lc.LayerCollection()) inputs = [tf.constant([[0., 1], [1, 2], [2, 3]]), tf.constant([[0.1], [0.], [0.]])] outputs = [tf.constant([[0.], [1.], [2.]]), tf.constant([[0., 0], [0, 0], [0, 4]])] block.register_additional_tower(inputs, outputs, transpose=[False, True]) grads = [output**2 for output in outputs] damping = tf.constant(0.) block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Create a dense update. dense_vector = tf.constant([[0.5], [0.5]]) # Compare Fisher-vector product against explicit result. result = block.multiply_inverse(dense_vector) expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector) sess.run(tf.global_variables_initializer()) self.assertAlmostEqual( sess.run(expected_result[0]), sess.run(result[0])) self.assertAlmostEqual( sess.run(expected_result[1]), sess.run(result[1]))
def sample_posterior_mean(X_new, X, f_sample, ls, kernel_func=rbf, ridge_factor=1e-3): """Sample posterior mean for f^*. Posterior for f_new is conditionally independent from other parameters in the model, therefore it's conditional posterior mean can be obtained by sampling from the posterior conditional f^* | f: In particular, we care about posterior predictive mean, i.e. E(f^*|f) = K(X^*, X)K(X, X)^{-1}f Args: X_new: (np.ndarray of float) testing locations, N_new x D X: (np.ndarray of float) training locations, N x D f_sample: (np.ndarray of float) M samples of posterior GP sample, N x M ls: (float) training lengthscale kernel_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition. Returns: (np.ndarray) N_new x M vectors of posterior predictive mean samples """ Kx = kernel_func(X, X_new, ls=ls) K = kernel_func(X, ls=ls, ridge_factor=ridge_factor) # add ridge factor to stabilize inversion. K_inv_f = tf.matrix_solve(K, f_sample) return tf.matmul(Kx, K_inv_f, transpose_a=True)
def test_broadcast_apply_and_solve(self): # These cannot be done in the automated (base test class) tests since they # test shapes that tf.matmul cannot handle. # In particular, tf.matmul does not broadcast. with self.test_session() as sess: x = tf.random_normal(shape=(2, 2, 3, 4)) # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve # and apply with 'x' as the argument. diag = tf.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True) self.assertAllEqual((2, 1, 3, 3), operator.shape) # Create a batch matrix with the broadcast shape of operator. diag_broadcast = tf.concat(1, (diag, diag)) mat = tf.matrix_diag(diag_broadcast) self.assertAllEqual((2, 2, 3, 3), mat.get_shape()) # being pedantic. operator_apply = operator.apply(x) mat_apply = tf.matmul(mat, x) self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape()) self.assertAllClose(*sess.run([operator_apply, mat_apply])) operator_solve = operator.solve(x) mat_solve = tf.matrix_solve(mat, x) self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape()) self.assertAllClose(*sess.run([operator_solve, mat_solve]))
def testBatchResultSize(self): # 3x3x3 matrices, 3x3x1 right-hand sides. matrix = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9.] * 3).reshape(3, 3, 3) rhs = np.array([1., 2., 3.] * 3).reshape(3, 3, 1) answer = tf.matrix_solve(matrix, rhs) ls_answer = tf.matrix_solve_ls(matrix, rhs) self.assertEqual(ls_answer.get_shape(), [3, 3, 1]) self.assertEqual(answer.get_shape(), [3, 3, 1])
def predict2(): # predicitions cov=h.Mul(K_mm_2,tf.matrix_inverse(K_mm_2+K_mnnm_2/tf.square(sigma_2)),K_mm_2) cov_chol=tf.cholesky(cov) mu=h.Mul(K_mm_2,tf.cholesky_solve(cov_chol,K_mn_2),Ytr)/tf.square(sigma_2) mean=h.Mul(K_nm_2,tf.matrix_solve(K_mm_1,mu)) variance=K_nn_2-h.Mul(K_nm_2,h.safe_chol(K_mm_2,tf.transpose(K_nm_2))) var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma_2),[N,1])) return mean, var_terms
def _logp(self, result, prior_mean, prior_cov, transition_mat, transition_mean, transition_cov, observation_mat=None, observation_mean=None, observation_cov=None): # define the Kalman filtering calculation within the TF graph if observation_mean is not None: observation_mean = tf.reshape(observation_mean, (self.K, 1)) transition_mean = tf.reshape(transition_mean, (self.D, 1)) pred_mean = tf.reshape(prior_mean, (self.D, 1)) pred_cov = prior_cov filtered_means = [] filtered_covs = [] step_logps = [] observations = tf.unpack(result) for t in range(self.T): obs_t = tf.reshape(observations[t], (self.K, 1)) if observation_mat is not None: tmp = tf.matmul(observation_mat, pred_cov) S = tf.matmul(tmp, tf.transpose(observation_mat)) + observation_cov # TODO optimize this to not use an explicit matrix inverse #Sinv = tf.matrix_inverse(S) #gain = tf.matmul(pred_cov, tf.matmul(tf.transpose(observation_mat), Sinv)) # todo worth implementing cholsolve explicitly? gain = tf.matmul(pred_cov, tf.transpose(tf.matrix_solve(S, observation_mat))) y = obs_t - tf.matmul(observation_mat, pred_mean) - observation_mean updated_mean = pred_mean + tf.matmul(gain, y) updated_cov = pred_cov - tf.matmul(gain, tmp) else: updated_mean = obs_t updated_cov = tf.zeros_like(pred_cov) S = pred_cov y = obs_t - pred_mean step_logp = bf.dists.multivariate_gaussian_log_density(y, 0, S) filtered_means.append(updated_mean) filtered_covs.append(updated_cov) step_logps.append(step_logp) if t < self.T-1: pred_mean = tf.matmul(transition_mat, updated_mean) + transition_mean pred_cov = tf.matmul(transition_mat, tf.matmul(updated_cov, tf.transpose(transition_mat))) + transition_cov self.filtered_means = filtered_means self.filtered_covs = filtered_covs self.step_logps = tf.pack(step_logps) logp = tf.reduce_sum(self.step_logps) return logp
def testSolve(self): with self.test_session(): for batch_shape in [(), (2, 3)]: for k in [1, 4]: operator, mat = self._build_operator_and_mat(batch_shape, k) # Work with 5 simultaneous systems. 5 is arbitrary. x = self._rng.randn(*(batch_shape + (k, 5))) self._compare_results(expected=tf.matrix_solve(mat, x).eval(), actual=operator.solve(x))
def test_solve_dynamic(self): with self.test_session() as sess: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=True) rhs = self._make_rhs(operator) op_solve_v, mat_solve_v = sess.run( [operator.solve(rhs), tf.matrix_solve(mat, rhs)], feed_dict=feed_dict) self.assertAllClose(op_solve_v, mat_solve_v)
def _verifySolve(self, x, y): for np_type in [np.float32, np.float64]: a = x.astype(np_type) b = y.astype(np_type) with self.test_session(): if a.ndim == 2: tf_ans = tf.matrix_solve(a, b) else: tf_ans = tf.batch_matrix_solve(a, b) out = tf_ans.eval() np_ans = np.linalg.solve(a, b) self.assertEqual(np_ans.shape, out.shape) self.assertAllClose(np_ans, out)
def Bound2(phi_0,phi_1,phi_2,sigma_noise,K_mm,mean_y): # Preliminary Bound beta=1/tf.square(sigma_noise) bound=0 N=h.get_dim(mean_y,0) M=h.get_dim(K_mm,0) W_inv_part=beta*phi_2+K_mm global phi_200 phi_200=tf.matrix_solve(W_inv_part,tf.transpose(phi_1)) W=beta*np.eye(N)-tf.square(beta)*h.Mul(phi_1,tf.matrix_solve(W_inv_part,tf.transpose(phi_1))) # Computations bound+=N*tf.log(beta) bound+=h.log_det(K_mm+1e-3*np.eye(M)) bound-=h.Mul(tf.transpose(mean_y),W,mean_y) global matrix_determinant matrix_determinant=tf.ones(1) #h.log_det(W_inv_part+1e2*np.eye(M))#-1e-40*tf.exp(h.log_det(W_inv_part)) bound-=h.log_det(W_inv_part+1e-3*tf.reduce_mean(W_inv_part)*np.eye(M)) bound-=beta*phi_0 bound+=beta*tf.trace(tf.cholesky_solve(tf.cholesky(K_mm),phi_2)) bound=bound*0.5 return bound
def test_solve(self): with self.test_session() as sess: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: operator, mat, _ = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=False) for adjoint in [False, True]: if adjoint and operator.is_self_adjoint: continue rhs = self._make_rhs(operator) op_solve = operator.solve(rhs, adjoint=adjoint) mat_solve = tf.matrix_solve(self._maybe_adjoint(mat, adjoint), rhs) self.assertAllEqual(op_solve.get_shape(), mat_solve.get_shape()) op_solve_v, mat_solve_v = sess.run([op_solve, mat_solve]) self.assertAllClose(op_solve_v, mat_solve_v)
def test_sqrt_solve(self): # Square roots are not unique, but we should still have # S^{-T} S^{-1} x = A^{-1} x. # In our case, we should have S = S^T, so then S^{-1} S^{-1} x = A^{-1} x. with self.test_session(): for batch_shape in [(), (2, 3,)]: for k in [1, 4]: operator, mat = self._build_operator_and_mat(batch_shape, k) # Work with 5 simultaneous systems. 5 is arbitrary. x = self._rng.randn(*(batch_shape + (k, 5))) self._compare_results( expected=tf.matrix_solve(mat, x).eval(), actual=operator.sqrt_solve(operator.sqrt_solve(x)))
def test_solve(self): self._maybe_skip("solve") with self.test_session() as sess: for use_placeholder in False, True: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: for adjoint in False, True: operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=use_placeholder) rhs = self._make_rhs(operator, adjoint=adjoint) op_solve = operator.solve(rhs, adjoint=adjoint) mat_solve = tf.matrix_solve(mat, rhs, adjoint=adjoint) if not use_placeholder: self.assertAllEqual(op_solve.get_shape(), mat_solve.get_shape()) op_solve_v, mat_solve_v = sess.run( [op_solve, mat_solve], feed_dict=feed_dict) self.assertAC(op_solve_v, mat_solve_v)
def constrained_gradient_descent(obj, con, var, step=0.1): # shape info var_shp = [x.get_shape() for x in var] # derivatives g = flatify(con) F = jacobian(obj, var) G = jacobian(g, var) # constrained gradient descent L = tf.matrix_solve( tf.matmul(T(G), G), -tf.matmul(T(G), F) ) Ugd = step*squeeze(F + tf.matmul(G, L)) # correction step (zangwill-garcia) # can be non-square so use least squares Ugz = squeeze(tf.matrix_solve_ls( tf.concat([T(G), Ugd[None, :]], 0), -tf.concat([g[:, None], [[0.0]]], 0), fast=False )) # updates gd_diffs = unpack(Ugd, var_shp) gz_diffs = unpack(Ugz, var_shp) # operators gd_upds = increment(var, gd_diffs) gz_upds = increment(var, gz_diffs) # slope gain = tf.squeeze(tf.matmul(Ugd[None, :], F)) return gd_upds, gz_upds, gain
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update the row_factors, else update the column factors. sp_input: Please refer to comments for update_row_factors and update_col_factors. transpose_input: If true, the input is logically transposed and then the corresponding rows/columns of the transposed input are updated. Returns: A tuple consisting of the following two elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. """ assert isinstance(sp_input, tf.SparseTensor) if update_row_factors: left = self._row_factors right_factors = self._col_factors_cache row_wt = self._row_wt_cache col_wt = self._col_wt_cache sharding_func = WALSModel._get_sharding_func(self._input_rows, self._num_row_shards) gramian = self._col_gramian_cache else: left = self._col_factors right_factors = self._row_factors_cache row_wt = self._col_wt_cache col_wt = self._row_wt_cache sharding_func = WALSModel._get_sharding_func(self._input_cols, self._num_col_shards) gramian = self._row_gramian_cache transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = tf.split(1, 2, sp_input.indices) update_row_indices, all_row_ids = tf.unique(row_ids[:, 0]) update_col_indices, all_col_ids = tf.unique(col_ids[:, 0]) col_ids = tf.expand_dims(tf.cast(all_col_ids, tf.int64), 1) row_ids = tf.expand_dims(tf.cast(all_row_ids, tf.int64), 1) if transpose_input: update_indices = update_col_indices row_shape = [tf.cast(tf.shape(update_row_indices)[0], tf.int64)] gather_indices = update_row_indices else: update_indices = update_row_indices row_shape = [tf.cast(tf.shape(update_col_indices)[0], tf.int64)] gather_indices = update_col_indices num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64) col_shape = [num_rows] right = embedding_ops.embedding_lookup(right_factors, gather_indices, partition_strategy='div') new_sp_indices = tf.concat(1, [row_ids, col_ids]) new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input else tf.concat(0, [col_shape, row_shape])) new_sp_input = tf.SparseTensor(indices=new_sp_indices, values=sp_input.values, shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * gramian) if self._regularization is not None: total_lhs += self._regularization if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * tf.sparse_tensor_dense_matmul(new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = tf.transpose(tf.matrix_solve(total_lhs, tf.transpose(total_rhs))) else: # TODO(yifanchen): Add special handling for single shard without using # embedding_lookup and perform benchmarks for those cases. row_weights_slice = embedding_ops.embedding_lookup( row_wt, update_indices, partition_strategy='div') col_weights = embedding_ops.embedding_lookup( col_wt, gather_indices, partition_strategy='div') partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs") total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs total_rhs = tf.expand_dims(total_rhs, -1) new_left_values = tf.squeeze(tf.matrix_solve(total_lhs, total_rhs), [2]) return (new_left_values, self.scatter_update(left, update_indices, new_left_values, sharding_func))
import tensorflow as tf sess = tf.InteractiveSession() x = tf.constant([[2, 5, 3, -5], [0, 3,-2, 5], [4, 3, 5, 3], [6, 1, 4, 0]]) y = tf.constant([[4, -7, 4, -3, 4], [6, 4,-7, 4, 7], [2, 3, 2, 1, 4], [1, 5, 5, 5, 2]]) floatx = tf.constant([[2., 5., 3., -5.], [0., 3.,-2., 5.], [4., 3., 5., 3.], [6., 1., 4., 0.]]) tf.transpose(x).eval() # Transpose matrix tf.matmul(x, y).eval() # Matrix multiplication tf.matrix_determinant(floatx).eval() # Matrix determinant tf.matrix_inverse(floatx).eval() # Matrix inverse tf.matrix_solve(floatx, [[1],[1],[1],[1]]).eval() # Solve Matrix system
def _solve(self, rhs): return tf.matrix_solve(self._pos_def_matrix, rhs)
Tr_Knn, K_nm_2, K_mnnm_2 = KS.build_psi_stats_rbf(X_m_2,tf.square(noise_2), len_sc_2, h_mu, h_S) K_mm_2=h.tf_SE_K(X_m_2,X_m_2,len_sc_2,noise_2) K_nn_2=h.tf_SE_K(h_mu,h_mu,len_sc_2,noise_2) K_mn_2=h.tf.transpose(K_nm_2) ''' opti_mu1= opti_Sig1= opti_mu2= opti_sig2= ''' a_mn=tf.matrix_solve(K_mm_1,K_mn_1) a_nm=tf.transpose(a_mn) sig_inv=tf.matrix_inverse(K_mm_1)+h.Mul(a_mn,a_nm)/tf.square(sigma_1) mu1=h.Mul(tf.matrix_inverse(sig_inv),a_mn,Ytr)/tf.square(sigma_1) F_v_1=GPLVM.Bound1(h_mu,h_S,K_mm_1,K_nm_1,Tr_Knn_1,sigma_1) s.run(tf.initialize_all_variables()) F_v_2=GPLVM.Bound2(Tr_Knn,K_nm_2,K_mnnm_2,sigma_2,K_mm_2,Ytr) mean,var_terms=predict(K_mn_2,sigma_2,K_mm_2,K_nn_2) mean=tf.reshape(mean,[-1]) var_terms=tf.reshape(var_terms,[-1]) global_step = tf.Variable(0, trainable=False)
def test_MatrixSolve(self): t = tf.matrix_solve(*self.random((3, 3), (3, 1))) self.check(t)
import tensorflow as tf from tensorflow.python.framework import ops ops.reset_default_graph() sess = tf.Session() x_vals = np.linspace(0, 10, 100) y_vals = x_vals + np.random.normal(0, 1, 100) x_vals_column = np.transpose(np.matrix(x_vals)) ones_column = np.transpose(np.matrix(np.repeat(1, 100))) A = np.column_stack((x_vals_column, ones_column)) b = np.transpose(np.matrix(y_vals)) A_tensor = tf.constant(A) b_tensor = tf.constant(b) tA_A = tf.matmul(tf.transpose(A_tensor), A_tensor) L = tf.cholesky(tA_A) tA_b = tf.matmul(tf.transpose(A_tensor), b) sol1 = tf.matrix_solve(L, tA_b) sol2 = tf.matrix_solve(tf.transpose(L), sol1) solution_eval = sess.run(sol2) slope = solution_eval[0][0] y_intercept = solution_eval[1][0] print 'slope ' + str(slope) print 'y_intercept' + str(y_intercept) best_fit = [] for i in x_vals: best_fit.append(slope * i + y_intercept) plt.plot(x_vals, y_vals, 'o', label = 'Data') plt.plot(x_vals, best_fit, 'r-', label = 'Best fit line') plt.legend(loc = 'upper left') plt.show()
def mean_cov(K_mm,sigma,K_mn,K_mnnm,Y): beta=1/tf.square(sigma) A_I=beta*K_mnnm+K_mm Sig=h.Mul(K_mm,tf.matrix_solve(A_I,K_mm)) mu=beta*h.Mul(K_mm,tf.matrix_solve(A_I,K_mn),Y) return mu,Sig
def predict_new1(self): #self.TrKnn, self.Knm, self.Kmnnm = self.psi() Kmm=self.Kern.K(self.pseudo,self.pseudo) return tf.matmul(self.Knm,tf.matrix_solve(Kmm+h.tol*np.eye(self.m),self.psMu))