def test_cholesky_grad_indef(): scipy = pytest.importorskip("scipy") x = tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = Cholesky(lower=True, on_error="raise") chol_f = function([x], grad(cholesky(x).sum(), [x])) with pytest.raises(scipy.linalg.LinAlgError): chol_f(matrix) cholesky = Cholesky(lower=True, on_error="nan") chol_f = function([x], grad(cholesky(x).sum(), [x])) assert np.all(np.isnan(chol_f(matrix)))
def solve(self, X, flux, cho_C, mu, LInv): """ Compute the maximum a posteriori (MAP) prediction for the spherical harmonic coefficients of a map given a flux timeseries. Args: X (matrix): The flux design matrix. flux (array): The flux timeseries. cho_C (scalar/vector/matrix): The lower cholesky factorization of the data covariance. mu (array): The prior mean of the spherical harmonic coefficients. LInv (scalar/vector/matrix): The inverse prior covariance of the spherical harmonic coefficients. Returns: The vector of spherical harmonic coefficients corresponding to the MAP solution and the Cholesky factorization of the corresponding covariance matrix. """ # TODO: These if statements won't play well with @autocompile!!! # Compute C^-1 . X if cho_C.ndim == 0: CInvX = X / cho_C ** 2 elif cho_C.ndim == 1: CInvX = tt.dot(tt.diag(1 / cho_C ** 2), X) else: CInvX = _cho_solve(cho_C, X) # Compute W = X^T . C^-1 . X + L^-1 W = tt.dot(tt.transpose(X), CInvX) if LInv.ndim == 0: W = tt.inc_subtensor( W[tuple((tt.arange(W.shape[0]), tt.arange(W.shape[0])))], LInv ) LInvmu = mu * LInv elif LInv.ndim == 1: W = tt.inc_subtensor( W[tuple((tt.arange(W.shape[0]), tt.arange(W.shape[0])))], LInv ) LInvmu = mu * LInv else: W += LInv LInvmu = tt.dot(LInv, mu) # Compute the max like y and its covariance matrix cho_W = sla.cholesky(W) M = _cho_solve(cho_W, tt.transpose(CInvX)) yhat = tt.dot(M, flux) + _cho_solve(cho_W, LInvmu) ycov = _cho_solve(cho_W, tt.eye(cho_W.shape[0])) cho_ycov = sla.cholesky(ycov) return yhat, cho_ycov
def test_cholesky_indef(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") x = tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = Cholesky(lower=True, on_error='raise') chol_f = function([x], cholesky(x)) with assert_raises(scipy.linalg.LinAlgError): chol_f(matrix) cholesky = Cholesky(lower=True, on_error='nan') chol_f = function([x], cholesky(x)) assert np.all(np.isnan(chol_f(matrix)))
def _indvdl_gauss( hparams, std_x, n_samples, L_cov, Normal, Deterministic, floatX, cholesky, tt, verbose): scale1 = np.float32(std_x[0] * hparams['v_indvdl_1']) scale2 = np.float32(std_x[1] * hparams['v_indvdl_2']) u1s = Normal( 'u1s', mu=np.float32(0.), tau=np.float32(1.), shape=(n_samples,), dtype=floatX ) u2s = Normal( 'u2s', mu=np.float32(0.), tau=np.float32(1.), shape=(n_samples,), dtype=floatX ) L_cov_ = cholesky(L_cov).astype(floatX) tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True) tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True) mu1s_ = Deterministic('mu1s_', L_cov[0, 0] * u1s + L_cov[0, 1] * u2s) mu2s_ = Deterministic('mu2s_', L_cov[1, 0] * u1s + L_cov[1, 1] * u2s) if 10 <= verbose: print('Normal for individual effect') print('u1s.dtype = {}'.format(u1s.dtype)) print('u2s.dtype = {}'.format(u2s.dtype)) return mu1s_, mu2s_
def _get_updates(self): n = self.params['batch_size'] N = self.params['train_size'] prec_lik = self.params['prec_lik'] prec_prior = self.params['prec_prior'] gc_norm = self.params['gc_norm'] gamma = float(n + N) / n # compute log-likelihood error = self.model_outputs - self.true_outputs logliks = log_normal(error, prec_lik) sumloglik = logliks.sum() # compute gradient of likelihood wrt each data point grads = tensor.jacobian(expression=logliks, wrt=self.weights) grads = tensor.concatenate([g.flatten(ndim=2) for g in grads], axis=1) avg_grads = grads.mean(axis=0) dist_grads = grads - avg_grads # compute variance of gradient var_grads = (1. / (n - 1)) * tensor.dot(dist_grads.T, dist_grads) logprior = log_prior_normal(self.weights, prec_prior) grads_prior = tensor.grad(cost=logprior, wrt=self.weights) grads_prior = tensor.concatenate([g.flatten() for g in grads_prior]) # update Fisher information I_t_next = (1 - 1 / self.it) * self.I_t + 1 / self.it * var_grads # compute noise if 'B' in self.params: B = self.params['B'] else: B = gamma * I_t_next * N # B += np.eye(self.n_weights) * (10 ** -9) B_ch = slinalg.cholesky(B) noise = tensor.dot(((2. / tensor.sqrt(self.lr)) * B_ch), trng.normal((self.n_weights, 1))) # expensive inversion inv_cond_mat = gamma * N * I_t_next + (4. / self.lr) * B cond_mat = nlinalg.matrix_inverse(inv_cond_mat) updates = [] updates.append((self.I_t, I_t_next)) updates.append((self.it, self.it + 1)) # update the parameters updated_params = 2 * tensor.dot( cond_mat, grads_prior + N * avg_grads + noise.flatten()) updated_params = updated_params.flatten() last_row = 0 for p in self.weights: sub_index = np.prod(p.get_value().shape) up = updated_params[last_row:last_row + sub_index] up = up.reshape(p.shape) updates.append((p, up)) last_row += sub_index return updates, sumloglik
def blk_tridiag_chol(A, B): ''' Compute the cholesky decompoisition of a symmetric, positive definite block-tridiagonal matrix. Inputs: A - [T x n x n] tensor, where each A[i,:,:] is the ith block diagonal matrix B - [T-1 x n x n] tensor, where each B[i,:,:] is the ith (upper) 1st block off-diagonal matrix Outputs: R - python list with two elements * R[0] - [T x n x n] tensor of block diagonal elements of Cholesky decomposition * R[1] - [T-1 x n x n] tensor of (lower) 1st block off-diagonal elements of Cholesky ''' # Code for computing the cholesky decomposition of a symmetric block tridiagonal matrix def compute_chol(Aip1, Bi, Li, Ci): Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T) Dii = Aip1 - T.dot(Ci, Ci.T) Lii = Tsla.cholesky(Dii) return [Lii, Ci] L1 = Tsla.cholesky(A[0]) C1 = T.zeros_like(B[0]) # this scan returns the diagonal and off-diagonal blocks of the cholesky decomposition mat, updates = theano.scan(fn=compute_chol, sequences=[A[1:], B], outputs_info=[L1, C1]) mat[0] = T.concatenate([T.shape_padleft(L1), mat[0]]) return mat
def blk_tridag_chol(A, B): ''' Compute the cholesky decompoisition of a symmetric, positive definite block-tridiagonal matrix. Inputs: A - [T x n x n] tensor, where each A[i,:,:] is the ith block diagonal matrix B - [T-1 x n x n] tensor, where each B[i,:,:] is the ith (upper) 1st block off-diagonal matrix Outputs: R - python list with two elements * R[0] - [T x n x n] tensor of block diagonal elements of Cholesky decomposition * R[1] - [T-1 x n x n] tensor of (lower) 1st block off-diagonal elements of Cholesky ''' # Code for computing the cholesky decomposition of a symmetric block tridiagonal matrix def compute_chol(Aip1, Bi, Li, Ci): Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T) Dii = Aip1 - T.dot(Ci, Ci.T) Lii = Tsla.cholesky(Dii) return [Lii,Ci] L1 = Tsla.cholesky(A[0]) C1 = T.zeros_like(B[0]) # this scan returns the diagonal and off-diagonal blocks of the cholesky decomposition mat, updates = theano.scan(fn=compute_chol, sequences=[A[1:], B], outputs_info=[L1,C1]) mat[0] = T.concatenate([T.shape_padleft(L1), mat[0]]) return mat
def test_gpu_cholesky_opt(self): if not imported_scipy: self.skipTest('SciPy is not enabled, skipping test') A = theano.tensor.matrix("A", dtype="float64") fn = theano.function([A], cholesky(A), mode=mode_with_gpu) assert any([isinstance(node.op, GpuCholesky) for node in fn.maker.fgraph.toposort()])
def _indvdl_t(hparams, std_x, n_samples, L_cov, verbose=0): df_L = hparams.df_indvdl dist_scale_indvdl = hparams.dist_scale_indvdl scale1 = std_x[0] * _dist_from_str('scale_mu1s', dist_scale_indvdl) scale2 = std_x[1] * _dist_from_str('scale_mu2s', dist_scale_indvdl) scale1 = scale1 / np.sqrt(df_L / (df_L - 2)) scale2 = scale2 / np.sqrt(df_L / (df_L - 2)) u1s = StudentT('u1s', nu=np.float32(df_L), shape=(n_samples, ), dtype=floatX) u2s = StudentT('u2s', nu=np.float32(df_L), shape=(n_samples, ), dtype=floatX) L_cov_ = cholesky(L_cov).astype(floatX) mu1s_ = Deterministic( 'mu1s_', L_cov_[0, 0] * u1s * scale1 + L_cov_[1, 0] * u2s * scale1) mu2s_ = Deterministic('mu2s_', L_cov_[1, 0] * u1s * scale2 + L_cov_[1, 1] * u2s * scale2) # [1, 0] is ... 0? if 10 <= verbose: print('StudentT for individual effect') print('u1s.dtype = {}'.format(u1s.dtype)) print('u2s.dtype = {}'.format(u2s.dtype)) return mu1s_, mu2s_
def _indvdl_t(hparams, n_samples, L_cov, verbose=0): df_L = hparams.df_indvdl dist_scale_indvdl = hparams.dist_scale_indvdl scale1 = _dist_from_str('scale_mu1s', dist_scale_indvdl) scale2 = _dist_from_str('scale_mu2s', dist_scale_indvdl) scale1 = scale1 / np.sqrt(df_L / (df_L - 2)) scale2 = scale2 / np.sqrt(df_L / (df_L - 2)) u1s = pm.StudentT('u1s', nu=np.float32(df_L), shape=(n_samples, )) u2s = pm.StudentT('u2s', nu=np.float32(df_L), shape=(n_samples, )) L_cov_ = cholesky(L_cov) mu1s_ = pm.Deterministic( 'mu1s_', L_cov_[0, 0] * u1s * scale1 + L_cov_[1, 0] * u2s * scale1) # Notice that L_cov_[0, 1] == zero mu2s_ = pm.Deterministic( 'mu2s_', L_cov_[1, 0] * u1s * scale2 + L_cov_[1, 1] * u2s * scale2) if 10 <= verbose: print('StudentT for individual effect') print('u1s.dtype = {}'.format(u1s.dtype)) print('u2s.dtype = {}'.format(u2s.dtype)) return mu1s_, mu2s_
def test_gpu_cholesky_opt(self): A = theano.tensor.matrix("A", dtype="float64") fn = theano.function([A], cholesky(A), mode=mode_with_gpu) assert any([ isinstance(node.op, GpuCholesky) for node in fn.maker.fgraph.toposort() ])
def return_output(self, Dif): #Dif is theano.Tensor.matrix type Frac = Dif / self.gamma Cov = self.v0 * T.pow(Frac, self.alpha) L = sin.cholesky(T.exp(-Cov)) eps = self.srng.normal(avg=0, std=0.001, size=(self.time, self.lsize)) return T.dot(L, eps)
def test_cholesky_grad_indef(): x = theano.tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = GpuCholesky(lower=True) chol_f = theano.function([x], theano.tensor.grad(cholesky(x).sum(), [x])) with pytest.raises(LinAlgError): chol_f(matrix)
def psd_solve_with_chol(node): if node.op == solve: A, b = node.inputs # result is solution Ax=b if is_psd(A): L = cholesky(A) # assume lower triangular factor x = solve_cholesky(L, b) return [x]
def test_cholesky_grad_indef(): x = theano.tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = GpuCholesky(lower=True) chol_f = theano.function([x], theano.tensor.grad(cholesky(x).sum(), [x])) with assert_raises(LinAlgError): chol_f(matrix)
def _indvdl_t( hparams, std_x, n_samples, L_cov, StudentT, Deterministic, floatX, cholesky, tt, verbose): df_L = hparams['df_indvdl'] scale1 = np.float32(std_x[0] * hparams['v_indvdl_1'] / np.sqrt(df_L / (df_L - 2))) scale2 = np.float32(std_x[1] * hparams['v_indvdl_2'] / np.sqrt(df_L / (df_L - 2))) u1s = StudentT('u1s', nu=np.float32(df_L), shape=(n_samples,), dtype=floatX) u2s = StudentT('u2s', nu=np.float32(df_L), shape=(n_samples,), dtype=floatX) L_cov_ = cholesky(L_cov).astype(floatX) tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True) tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True) mu1s_ = Deterministic('mu1s_', L_cov_[0, 0] * u1s + L_cov_[0, 1] * u2s) mu2s_ = Deterministic('mu2s_', L_cov_[1, 0] * u1s + L_cov_[1, 1] * u2s) if 10 <= verbose: print('StudentT for individual effect') print('u1s.dtype = {}'.format(u1s.dtype)) print('u2s.dtype = {}'.format(u2s.dtype)) return mu1s_, mu2s_
def return_output(self,Dif): #Dif is theano.Tensor.matrix type Frac = Dif/self.gamma Cov = self.v0*T.pow(Frac,self.alpha) L = sin.cholesky(T.exp(-Cov)) eps = self.srng.normal(avg=0,std=0.001,size=(self.time,self.lsize)) return T.dot(L,eps)
def nlml(Y, hyp, X, X_sp, EyeM): # TODO allow for different pseudo inputs for each dimension # initialise the (before compilation) kernel function hyps = [hyp[:idims+1], hyp[idims+1]] kernel_func = partial(cov.Sum, hyps, self.covs) sf2 = hyp[idims]**2 sn2 = hyp[idims+1]**2 N = X.shape[0].astype(theano.config.floatX) ridge = 1e-6 Kmm = kernel_func(X_sp) + ridge*EyeM Kmn = kernel_func(X_sp, X) Lmm = cholesky(Kmm) rhs = tt.concatenate([EyeM, Kmn], axis=1) sol = solve_lower_triangular(Lmm, rhs) iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]]) Lmn = sol[:, EyeM.shape[0]:] diagQnn = (Lmn**2).sum(0) # Gamma = diag(Knn - Qnn) + sn2*I Gamma = sf2 + sn2 - diagQnn Gamma_inv = 1.0/Gamma # these operations are done to avoid inverting Qnn+Gamma) sqrtGamma_inv = tt.sqrt(Gamma_inv) Lmn_ = Lmn*sqrtGamma_inv # Kmn_*Gamma^-.5 Yi = Y*(sqrtGamma_inv) # Gamma^-.5* Y # I + Lmn * Gamma^-1 * Lnm Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot(Lmn_.T) Amm = cholesky(Bmm) LAmm = Lmm.dot(Amm) Kmn_dotYi = Kmn.dot(Yi*(sqrtGamma_inv)) rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1) sol = solve_upper_triangular( LAmm.T, solve_lower_triangular(LAmm, rhs)) iBmm = sol[:, :-1] beta_sp = sol[:, -1] log_det_K_sp = tt.sum(tt.log(Gamma)) log_det_K_sp += 2*tt.sum(tt.log(tt.diag(Amm))) loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp) loss_sp += log_det_K_sp + N*np.log(2*np.pi) loss_sp *= 0.5 return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp
def nlml(A, phidotY, EyeM): Lmm = cholesky(A) rhs = tt.concatenate([EyeM, phidotY[:, None]], axis=1) sol = solve_upper_triangular( Lmm.T, solve_lower_triangular(Lmm, rhs)) iA = sol[:, :-1] beta_ss = sol[:, -1] return iA, Lmm, beta_ss
def test_gpu_cholesky_opt(self): A = theano.tensor.matrix("A", dtype="float32") fn = theano.function([A], cholesky(A), mode=mode_with_gpu.excluding("cusolver")) assert any([ isinstance(node.op, GpuMagmaCholesky) for node in fn.maker.fgraph.toposort() ])
def logprob(x, m, S): delta = x - m L = cholesky(S) beta = solve_lower_triangular(L, delta.T).T lp = -0.5 * tt.square(beta).sum(-1) lp -= tt.sum(tt.log(tt.diagonal(L))) lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype( theano.config.floatX) return lp
def test_gpu_cholesky_opt(self): if not imported_scipy: self.skipTest('SciPy is not enabled, skipping test') A = theano.tensor.matrix("A", dtype="float64") fn = theano.function([A], cholesky(A), mode=mode_with_gpu) assert any([ isinstance(node.op, GpuCholesky) for node in fn.maker.fgraph.toposort() ])
def __init__(self, tau2_0=0.1, sigma2_0=0.1, l_0=0.1, eta=0.1, debug=1): """ :type sigma_0: float :param sigma_0: starting value for variance. :type l_0: float :param l_0: starting value for length scale. :type eta: float :param eta: learning rate :type debug: int :param debug: verbosity """ print "GP Initing..." if debug > 0 else 0 ################################################## #### Prepare the -loglik gradient descent ##Init the shared vars X = T.dmatrix('X') f = T.dmatrix('f') self.tau2 = theano.shared(tau2_0) self.l = theano.shared(l_0) self.sigma2 = theano.shared(sigma2_0) #Make the covar matrix K = self.covFunc(X, X, self.l) #Get a numerically safe decomp L = LA.cholesky(K + self.tau2 * T.identity_like(K)) #Calculate the weights for each of the training data; predictions are a weighted sum. alpha = LA.solve(T.transpose(L), LA.solve(L, f)) ##Calculate - log marginal likelihood nloglik = -T.reshape( -0.5 * T.dot(T.transpose(f), alpha) - T.sum(T.log(T.diag(L))), []) #Get grad grads = [ T.grad(nloglik, self.tau2), T.grad(nloglik, self.l), T.grad(nloglik, self.sigma2) ] #Updates, make sure to keep the params positive updates = [ (var, T.max([var - eta * grad, 0.1])) for var, grad in zip([self.tau2, self.l, self.sigma2], grads) ] self._gd = theano.function(inputs=[X, f], updates=updates) print "Done" if debug > 0 else 0
def psd_solve_with_chol(node): if node.op == solve: A, b = node.inputs # result is solution Ax=b if is_psd(A): L = cholesky(A) # N.B. this can be further reduced to a yet-unwritten cho_solve Op # __if__ no other Op makes use of the the L matrix during the # stabilization Li_b = Solve('lower_triangular')(L, b) x = Solve('upper_triangular')(L.T, Li_b) return [x]
def exact_proj_cholesky(x, x_test, gp_params, indep_noise, batch_size): Ktt = cov_mat(x_test, x_test, gp_params) Kxt = cov_mat(x, x_test, gp_params) Kxx = cov_mat(x, x, gp_params) Kxx = Kxx + indep_noise * T.identity_like(Kxx) KxtT_Kxxinv = Kxt.T.dot(T.nlinalg.matrix_inverse(Kxx)) K = Ktt - KxtT_Kxxinv.dot(Kxt) K = K + 1e-10 * T.identity_like(K) R = cholesky(K) eps = rng.normal(size=(batch_size, x_test.shape[0])) return R.dot(eps.T).T
def lnlike(cls, X, flux, C, mu, L): """ Compute the log marginal likelihood of the data given a design matrix. Args: X (matrix): The flux design matrix. flux (array): The flux timeseries. C (scalar/vector/matrix): The data covariance matrix. mu (array): The prior mean of the spherical harmonic coefficients. L (scalar/vector/matrix): The prior covariance of the spherical harmonic coefficients. Returns: The log marginal likelihood of the `flux` vector conditioned on the design matrix `X`. This is the likelihood marginalized over all possible spherical harmonic vectors, which is analytically computable for the linear `starry` model. """ # TODO: These if statements won't play well with @autocompile!!! # Compute the GP mean gp_mu = tt.dot(X, mu) # Compute the GP covariance if L.ndim == 0: XLX = tt.dot(X, tt.transpose(X)) * L elif L.ndim == 1: XLX = tt.dot(tt.dot(X, tt.diag(L)), tt.transpose(X)) else: XLX = tt.dot(tt.dot(X, L), tt.transpose(X)) if C.ndim == 0 or C.ndim == 1: gp_cov = tt.inc_subtensor( XLX[tuple((tt.arange(XLX.shape[0]), tt.arange(XLX.shape[0])))], C, ) else: gp_cov = C + XLX cho_gp_cov = sla.cholesky(gp_cov) # Compute the marginal likelihood N = X.shape[0] r = tt.reshape(flux - gp_mu, (-1, 1)) lnlike = -0.5 * tt.dot(tt.transpose(r), _cho_solve(cho_gp_cov, r)) lnlike -= tt.sum(tt.log(tt.diag(cho_gp_cov))) lnlike -= 0.5 * N * tt.log(2 * np.pi) return lnlike[0, 0]
def cholesky(square_mat): """ cholesky perfoms a cholesky decomposition on a keras variable :param square_mat - a square positive definite matrix """ if K.backend() == 'tensorflow': import tensorflow as tf L = tf.cholesky(square_mat) return L else: import theano.tensor.slinalg as alg L = alg.cholesky(square_mat) return L
def test_cholesky_grad(): pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) # The dots are inside the graph since Cholesky needs separable matrices # Check the default. utt.verify_grad(lambda r: cholesky(r.dot(r.T)), [r], 3, rng) # Explicit lower-triangular. utt.verify_grad(lambda r: Cholesky(lower=True)(r.dot(r.T)), [r], 3, rng) # Explicit upper-triangular. utt.verify_grad(lambda r: Cholesky(lower=False)(r.dot(r.T)), [r], 3, rng)
def setUp(self): super(test_MatrixInverseCholesky, self).setUp() self.op_class = MatrixInverseCholesky self.op = MatrixInverseCholesky(lower = True) self.dtype = config.floatX self.A = theano.tensor.matrix("A", self.dtype) self.L = cholesky(self.A) self.B = theano.tensor.matrix(self.dtype) self.dim = 5 self.B_cols = 2 self.rng = numpy.random.RandomState(utt.fetch_seed()) self.A_mat = numpy.asarray(self.rng.rand(self.dim, self.dim), dtype=self.dtype) self.A_mat = self.A_mat.T.dot(self.A_mat) self.B_mat = numpy.asarray(self.rng.rand(self.dim, self.B_cols), dtype=self.dtype) self.L_mat = scipy.linalg.cholesky(self.A_mat, lower=True)
def test_local_lift_cholesky(): if not cusolver_available: raise SkipTest('No cuSolver') A = tensor.fmatrix() o = slinalg.cholesky(A) f_cpu = theano.function([A], o, mode=mode_without_gpu) f_gpu = theano.function([A], o, mode=mode_with_gpu) assert not any(isinstance(n.op, slinalg.Cholesky) for n in f_gpu.maker.fgraph.apply_nodes) # GpuCholesky op in this graph should be inplace (as his input is not reused by other op). assert any(isinstance(n.op, GpuCholesky) and n.op.inplace for n in f_gpu.maker.fgraph.apply_nodes) M_val = np.random.normal(size=(3, 3)).astype("float32") # A = M.dot(M) will be positive definite for all non-singular M A_val = M_val.dot(M_val.T) utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
def test_gpu_cholesky_not_inplace(): if not cusolver_available: raise SkipTest('No cuSolver') A = tensor.fmatrix() A_squared = A**2 B = slinalg.cholesky(A_squared) D = B + A_squared f_cpu = theano.function([A], D, mode=mode_without_gpu) f_gpu = theano.function([A], D, mode=mode_with_gpu) # GpuCholesky op in this graph should NOT be inplace (as his input is reused in another op) count_cholesky_not_inplace = len([n.op for n in f_gpu.maker.fgraph.apply_nodes if isinstance(n.op, GpuCholesky) and not n.op.inplace]) assert count_cholesky_not_inplace == 1, count_cholesky_not_inplace M_val = np.random.normal(size=(3, 3)).astype("float32") # A = M.dot(M) will be positive definite for all non-singular M A_val = M_val.dot(M_val.T) utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
def setUp(self): super(test_SolveCholesky, self).setUp() self.op_class = SolveCholesky self.op = SolveCholesky() self.dtype = config.floatX self.A = theano.tensor.matrix(self.dtype) self.L = cholesky(self.A) self.B = theano.tensor.matrix(self.dtype) self.b = theano.tensor.vector(self.dtype) self.dim = 5 rng = numpy.random.RandomState(utt.fetch_seed()) self.A_mat = numpy.asarray(rng.rand(self.dim, self.dim), dtype=self.dtype) self.A_mat = self.A_mat.T.dot(self.A_mat) self.B_mat = numpy.asarray(rng.rand(self.dim, self.dim), dtype=self.dtype) self.b_vec = numpy.asarray(rng.rand(self.dim), dtype=self.dtype) self.L_mat = scipy.linalg.cholesky(self.A_mat, lower=True)
def test_cholesky_grad(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) # The dots are inside the graph since Cholesky needs separable matrices # Check the default. yield (lambda: utt.verify_grad(lambda r: cholesky(r.dot(r.T)), [r], 3, rng)) # Explicit lower-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=True)(r.dot(r.T)), [r], 3, rng)) # Explicit upper-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=False)(r.dot(r.T)), [r], 3, rng))
def test_cholesky_grad(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) # The dots are inside the graph since Cholesky needs separable matrices # Check the default. yield ( lambda: utt.verify_grad(lambda r: cholesky(r.dot(r.T)), [r], 3, rng)) # Explicit lower-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=True) (r.dot(r.T)), [r], 3, rng)) # Explicit upper-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=False) (r.dot(r.T)), [r], 3, rng))
def linear_mmd2_and_hotelling(X, Y, biased=True, reg=0): if not biased: raise ValueError("linear_mmd2_and_hotelling only works for biased est") n = X.shape[0] p = X.shape[1] Z = X - Y Z_bar = Z.mean(axis=0) mmd2 = Z_bar.dot(Z_bar) Z_cent = Z - Z_bar S = Z_cent.T.dot(Z_cent) / (n - 1) # z' inv(S) z = z' inv(L L') z = z' inv(L)' inv(L) z = ||inv(L) z||^2 L = slinalg.cholesky(S + reg * T.eye(p)) Linv_Z_bar = slinalg.solve_lower_triangular(L, Z_bar) lambda_ = n * Linv_Z_bar.dot(Linv_Z_bar) # happens on the CPU! return mmd2, lambda_
def _indvdl_gg( hparams, std_x, n_samples, L_cov, Normal, Gamma, Deterministic, sgn, gamma, floatX, cholesky, tt, verbose): # Uniform distribution on sphere gs = Normal('gs', np.float32(0.0), np.float32(1.0), shape=(n_samples, 2), dtype=floatX) ss = Deterministic('ss', gs + sgn(sgn(gs) + np.float32(1e-10)) * np.float32(1e-10)) ns = Deterministic('ns', ss.norm(L=2, axis=1)[:, np.newaxis]) us = Deterministic('us', ss / ns) # Scaling s.t. variance to 1 n = 2 # dimension beta = np.float32(hparams['beta_coeff']) m = n * gamma(0.5 * n / beta) \ / (2 ** (1 / beta) * gamma((n + 2) / (2 * beta))) L_cov_ = (np.sqrt(m) * cholesky(L_cov)).astype(floatX) # Scaling to v_indvdls scale1 = np.float32(std_x[0] * hparams['v_indvdl_1']) scale2 = np.float32(std_x[1] * hparams['v_indvdl_2']) tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True) tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True) # Draw samples ts = Gamma( 'ts', alpha=np.float32(n / (2 * beta)), beta=np.float32(.5), shape=n_samples, dtype=floatX )[:, np.newaxis] mus_ = Deterministic( 'mus_', ts**(np.float32(0.5 / beta)) * us.dot(L_cov_) ) mu1s_ = mus_[:, 0] mu2s_ = mus_[:, 1] if 10 <= verbose: print('GG for individual effect') print('gs.dtype = {}'.format(gs.dtype)) print('ss.dtype = {}'.format(ss.dtype)) print('ns.dtype = {}'.format(ns.dtype)) print('us.dtype = {}'.format(us.dtype)) print('ts.dtype = {}'.format(ts.dtype)) return mu1s_, mu2s_
def test_cholesky_and_cholesky_grad_shape(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = numpy.random.RandomState(utt.fetch_seed()) x = tensor.matrix() for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)): f_chol = theano.function([x], l.shape) g = tensor.grad(l.sum(), x) f_cholgrad = theano.function([x], g.shape) topo_chol = f_chol.maker.fgraph.toposort() topo_cholgrad = f_cholgrad.maker.fgraph.toposort() if config.mode != "FAST_COMPILE": assert sum([node.op.__class__ == Cholesky for node in topo_chol]) == 0 assert sum([node.op.__class__ == CholeskyGrad for node in topo_cholgrad]) == 0 for shp in [2, 3, 5]: m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX) yield numpy.testing.assert_equal, f_chol(m), (shp, shp) yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
def test_cholesky(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) pd = np.dot(r, r.T) x = tensor.matrix() chol = cholesky(x) # Check the default. ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit lower-triangular. chol = Cholesky(lower=True)(x) ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit upper-triangular. chol = Cholesky(lower=False)(x) ch_f = function([x], chol) yield check_upper_triangular, pd, ch_f
def test_cholesky_and_cholesky_grad_shape(): pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) x = tensor.matrix() for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)): f_chol = theano.function([x], l.shape) g = tensor.grad(l.sum(), x) f_cholgrad = theano.function([x], g.shape) topo_chol = f_chol.maker.fgraph.toposort() topo_cholgrad = f_cholgrad.maker.fgraph.toposort() if config.mode != "FAST_COMPILE": assert sum([node.op.__class__ == Cholesky for node in topo_chol]) == 0 assert ( sum([node.op.__class__ == CholeskyGrad for node in topo_cholgrad]) == 0 ) for shp in [2, 3, 5]: m = np.cov(rng.randn(shp, shp + 10)).astype(config.floatX) np.testing.assert_equal(f_chol(m), (shp, shp)) np.testing.assert_equal(f_cholgrad(m), (shp, shp))
def test_cholesky(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = numpy.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) pd = numpy.dot(r, r.T) x = tensor.matrix() chol = cholesky(x) # Check the default. ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit lower-triangular. chol = Cholesky(lower=True)(x) ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit upper-triangular. chol = Cholesky(lower=False)(x) ch_f = function([x], chol) yield check_upper_triangular, pd, ch_f
def sample_covariance_theano(mean, covariance): # http://scicomp.stackexchange.com/q/22111/19265 srng = RandomStreams(seed=481) random = srng.normal(mean.shape) decomp = slinalg.cholesky(covariance) return T.dot(decomp, random) + mean
def compute_chol(Aip1, Bi, Li, Ci): Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T) Dii = Aip1 - T.dot(Ci, Ci.T) Lii = Tsla.cholesky(Dii) return [Lii,Ci]
def test_gpu_cholesky_opt(self): A = theano.tensor.matrix("A", dtype="float32") fn = theano.function([A], cholesky(A), mode=mode_with_gpu.excluding('cusolver')) assert any([isinstance(node.op, GpuMagmaCholesky) for node in fn.maker.fgraph.toposort()])