def sample_y(X, cov, noise_var, yd, sparse_lscales=4.0): n = X.shape[0] if n < 40000: from gpy_linalg import jitchol KK = mcov(X, cov, noise_var) n = KK.shape[0] L = jitchol(KK) #L = np.linalg.cholesky(KK) Z = np.random.randn(X.shape[0], yd) y = np.dot(L, Z) else: import scipy.sparse import scikits.sparse from treegp.cover_tree import VectorTree import pyublas n = X.shape[0] ptree = VectorTree(X, 1, cov.dfn_str, cov.dfn_params, cov.wfn_str, cov.wfn_params) entries = ptree.sparse_training_kernel_matrix(X, sparse_lscales, False) KKsparse = scipy.sparse.coo_matrix((entries[:,2], (entries[:,0], entries[:,1])), shape=(n,n), dtype=float) KKsparse = KKsparse + noise_var * scipy.sparse.eye(n) # attempt sparsity cause nothing else is going to work factor = scikits.sparse.cholmod.cholesky(KKsparse) L = factor.L() P = factor.P() Pinv = np.argsort(P) z = np.random.randn(n, yd) y = np.array((L * z)[Pinv]) return y
def gaussian_llgrad_sparse(self, X, Y, grad_X=False, grad_cov=False, max_distance=5.0): import scipy.sparse import scipy.sparse.linalg import scikits.sparse.cholmod t0 = time.time() t0 = time.time() n, dx = X.shape dy = Y.shape[1] gradX = np.zeros(()) gradC = np.zeros(()) if n == 0: if grad_X: gradX = np.zeros(X.shape) if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov, )) return 0.0, gradX, gradC tree = VectorTree(X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params) t05 = time.time() entries = tree.sparse_training_kernel_matrix(X, max_distance, False) t06 = time.time() nzr, nzc, entries_K = np.array(entries[:, 0], dtype=np.int32), np.array( entries[:, 1], dtype=np.int32), entries[:, 2] t07 = time.time() distances = tree.sparse_distances(X, X, nzr, nzc) t08 = time.time() spK = scipy.sparse.coo_matrix((entries_K, (nzr, nzc)), shape=(n, n), dtype=float) spK = (spK + self.noise_var * scipy.sparse.eye(spK.shape[0])).tocsc() t1 = time.time() print " sparse entires %.3f copy %.03f distances %.03f matrix %.03f" % ( t06 - t05, t07 - t06, t08 - t07, t1 - t08) factor = scikits.sparse.cholmod.cholesky(spK) t11 = time.time() Alpha = factor(Y) t12 = time.time() prec = factor.inv() t13 = time.time() #pprec = pdinv(spK.toarray()) t133 = time.time() logdet = factor.logdet() t14 = time.time() print " sparse factor %.3f alpha %.3f inv %.3f pinv %.3f logdet %.3f" % ( t11 - t1, t12 - t11, t13 - t12, t133 - t13, t14 - t133) unpermuted_L = factor.L() P = factor.P() Pinv = np.argsort(P) L = unpermuted_L[Pinv][:, Pinv] ll = -.5 * np.sum(Y * Alpha) ll += -.5 * dy * logdet ll += -.5 * dy * n * np.log(2 * np.pi) t2 = time.time() if grad_X: gradX = np.zeros((n, dx)) for i in range(dx): dK_entries = tree.sparse_kernel_deriv_wrt_xi( X, i, nzr, nzc, distances) sdK = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() d_logdet = -dy * np.asarray( sdK.multiply(prec).sum(axis=1)).reshape((-1, )) #d_logdet = -dy * factor(sdK).diagonal() """ LL = L.toarray() V = 1.0/LL VV = V.reshape((-1,)) nans = np.isinf(VV) VV[nans] = 0 dK = sdK.toarray() d_logdet2 = np.sum(dK * V, axis=1) import pdb; pdb.set_trace() """ dK_alpha = sdK * Alpha scaled = dK_alpha * Alpha gradX[:, i] = d_logdet + np.sum(scaled, axis=1) t3 = time.time() if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov, )) for i in range(ncov): if (i == 0): dKdi = scipy.sparse.eye(X.shape[0]) elif (i == 1): if (len(self.cov.wfn_params) != 1): raise ValueError( 'gradient computation currently assumes just a single scaling parameter for weight function, but currently wfn_params=%s' % cov.wfn_params) dKdi = (spK - (self.noise_var * scipy.sparse.eye(spK.shape[0])) ) / self.cov.wfn_params[0] else: dK_entries = tree.sparse_kernel_deriv_wrt_i( X, X, nzr, nzc, i - 2, distances) #dKdi = self.dKdi(X, i, block=block_i) dKdi = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() dlldi = .5 * np.sum(np.multiply(Alpha, dKdi * Alpha)) dlldi -= .5 * dy * dKdi.multiply( prec).sum() # factor(dKdi).diagonal().sum() gradC[i] = dlldi t4 = time.time() print "sparse tree %.4f kernel %.3f ll %.3f gradX %.3f gradC %.3f total %.3f" % ( t05 - t0, t1 - t05, t2 - t1, t3 - t2, t4 - t3, t4 - t0) return ll, gradX, gradC
def gaussian_llgrad_sparse(self, X, Y, grad_X = False, grad_cov=False, max_distance=5.0): import scipy.sparse import scipy.sparse.linalg import scikits.sparse.cholmod t0 = time.time() t0 = time.time() n, dx = X.shape dy = Y.shape[1] gradX = np.zeros(()) gradC = np.zeros(()) if n==0: if grad_X: gradX = np.zeros(X.shape) if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov,)) return 0.0, gradX, gradC tree = VectorTree(X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params) t05 = time.time() entries = tree.sparse_training_kernel_matrix(X, max_distance, False) t06 = time.time() nzr, nzc, entries_K = np.array(entries[:,0], dtype=np.int32), np.array(entries[:,1], dtype=np.int32), entries[:,2] t07 = time.time() distances = tree.sparse_distances(X, X, nzr, nzc); t08 = time.time() spK = scipy.sparse.coo_matrix((entries_K, (nzr, nzc)), shape=(n,n), dtype=float) spK = (spK + self.noise_var * scipy.sparse.eye(spK.shape[0])).tocsc() t1 = time.time() print " sparse entires %.3f copy %.03f distances %.03f matrix %.03f" % (t06-t05, t07-t06, t08-t07, t1-t08) factor = scikits.sparse.cholmod.cholesky(spK) t11 = time.time() Alpha = factor(Y) t12 = time.time() prec = factor.inv() t13 = time.time() #pprec = pdinv(spK.toarray()) t133 = time.time() logdet = factor.logdet() t14 = time.time() print " sparse factor %.3f alpha %.3f inv %.3f pinv %.3f logdet %.3f" % (t11-t1, t12-t11, t13-t12, t133-t13, t14-t133) unpermuted_L = factor.L() P = factor.P() Pinv = np.argsort(P) L = unpermuted_L[Pinv][:,Pinv] ll = -.5 * np.sum(Y*Alpha) ll += -.5 * dy * logdet ll += -.5 * dy * n * np.log(2*np.pi) t2 = time.time() if grad_X: gradX = np.zeros((n, dx)) for i in range(dx): dK_entries = tree.sparse_kernel_deriv_wrt_xi(X, i, nzr, nzc, distances) sdK = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() d_logdet = -dy * np.asarray(sdK.multiply(prec).sum(axis=1)).reshape((-1,)) #d_logdet = -dy * factor(sdK).diagonal() """ LL = L.toarray() V = 1.0/LL VV = V.reshape((-1,)) nans = np.isinf(VV) VV[nans] = 0 dK = sdK.toarray() d_logdet2 = np.sum(dK * V, axis=1) import pdb; pdb.set_trace() """ dK_alpha = sdK * Alpha scaled = dK_alpha * Alpha gradX[:, i] = d_logdet + np.sum(scaled, axis=1) t3 = time.time() if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov,)) for i in range(ncov): if (i == 0): dKdi = scipy.sparse.eye(X.shape[0]) elif (i == 1): if (len(self.cov.wfn_params) != 1): raise ValueError('gradient computation currently assumes just a single scaling parameter for weight function, but currently wfn_params=%s' % cov.wfn_params) dKdi = (spK - (self.noise_var * scipy.sparse.eye(spK.shape[0]))) / self.cov.wfn_params[0] else: dK_entries = tree.sparse_kernel_deriv_wrt_i(X, X, nzr, nzc, i-2, distances) #dKdi = self.dKdi(X, i, block=block_i) dKdi = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() dlldi = .5 * np.sum(np.multiply(Alpha,dKdi* Alpha)) dlldi -= .5 * dy * dKdi.multiply(prec).sum() # factor(dKdi).diagonal().sum() gradC[i] = dlldi t4 = time.time() print "sparse tree %.4f kernel %.3f ll %.3f gradX %.3f gradC %.3f total %.3f" % (t05-t0, t1-t05, t2-t1, t3-t2, t4-t3, t4-t0) return ll, gradX, gradC