def sample_y(X, cov, noise_var, yd, sparse_lscales=4.0): n = X.shape[0] if n < 40000: from gpy_linalg import jitchol KK = mcov(X, cov, noise_var) n = KK.shape[0] L = jitchol(KK) #L = np.linalg.cholesky(KK) Z = np.random.randn(X.shape[0], yd) y = np.dot(L, Z) else: import scipy.sparse import scikits.sparse from treegp.cover_tree import VectorTree import pyublas n = X.shape[0] ptree = VectorTree(X, 1, cov.dfn_str, cov.dfn_params, cov.wfn_str, cov.wfn_params) entries = ptree.sparse_training_kernel_matrix(X, sparse_lscales, False) KKsparse = scipy.sparse.coo_matrix((entries[:,2], (entries[:,0], entries[:,1])), shape=(n,n), dtype=float) KKsparse = KKsparse + noise_var * scipy.sparse.eye(n) # attempt sparsity cause nothing else is going to work factor = scikits.sparse.cholmod.cholesky(KKsparse) L = factor.L() P = factor.P() Pinv = np.argsort(P) z = np.random.randn(n, yd) y = np.array((L * z)[Pinv]) return y
def test_cover_tree_multiplication(self): # generate random data points np.random.seed(6) cluster = np.random.normal(size=(10, 2)) * 10 cluster_locations = np.random.normal(size=(100, 2)) * 1000 X = np.zeros((1000, 2)) for (i,cl) in enumerate(cluster_locations): X[10*i:10*(i+1),:] = cluster + cl # build a cover tree dfn_param = np.array((1.0, 1.0), dtype=float) weight_param = np.array((1.0,), dtype=float) tree = VectorTree(X, 1, "euclidean", dfn_param, 'se', weight_param) # assign an arbitrary value to each data point v = np.array([1,2,3,4,5,6,7,8,9,10] * 100, dtype=float) tree.set_v(0, v) query_pt = np.matrix(cluster_locations[29,:], dtype=float, copy=True) #w = lambda x1, x2 : np.exp(-1 * np.linalg.norm(x1-x2, 2)**2 ) #k = [w(query_pt, x) for x in X] #kv = np.dot(k, v) kv_tree = tree.weighted_sum(0, query_pt, 1e-4) self.assertAlmostEqual(0.893282181527, kv_tree, places=4) self.assertEqual(tree.fcalls, 54)
def __setstate__(self, d): self.__dict__ = d dummy_X = np.array([ [ 0.0, ] * self.X.shape[1], ], dtype=float) self.predict_tree = VectorTree(dummy_X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params)
def __init__(self, X, Y, block_fn, cov, noise_var, kernelized=False, dy=None, neighbor_threshold=1e-3, nonstationary=False, nonstationary_prec=False, block_idxs=None, neighbors=None): self.X = X if kernelized: self.kernelized = True self.YY = Y assert (dy is not None) self.dy = dy else: self.kernelized = False self.Y = Y if block_idxs is None: block_idxs = block_fn(X) self.block_idxs = block_idxs self.block_fn = block_fn self.n_blocks = len(block_idxs) self.nonstationary = nonstationary self.cov = cov self.noise_var = noise_var dummy_X = np.array([ [ 0.0, ] * self.X.shape[1], ], dtype=float) self.predict_tree = VectorTree(dummy_X, 1, cov.dfn_str, cov.dfn_params, cov.wfn_str, cov.wfn_params) if neighbors is not None: self.neighbors = neighbors else: self.compute_neighbors(threshold=neighbor_threshold) self.compute_neighbor_count() self.neighbor_dict = symmetrize_neighbors(self.neighbors) self.neighbor_threshold = neighbor_threshold
def update_covs(self, covs): nv, sv = covs[0, :2] lscales = covs[0, 2:] self.cov = GPCov(wfn_params=[ sv, ], dfn_params=lscales, dfn_str=self.cov.dfn_str, wfn_str=self.cov.wfn_str) self.noise_var = nv dummy_X = np.array([ [ 0.0, ] * self.X.shape[1], ], dtype=float) self.predict_tree = VectorTree(dummy_X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params)
def train_predictor(self, test_cov=None, Y=None): if Y is None: Y = self.Y if test_cov is None: test_cov = self.cov test_ptree = self.predict_tree else: dummy_X = np.array([ [ 0.0, ] * self.X.shape[1], ], dtype=float) test_ptree = VectorTree(dummy_X, 1, test_cov.dfn_str, test_cov.dfn_params, test_cov.wfn_str, test_cov.wfn_params) block_Kinvs = [] block_Alphas = [] for i in range(self.n_blocks): #i_start, i_end = self.block_boundaries[i] idxs = self.block_idxs[i] X = self.X[idxs] blockY = Y[idxs] K = self.kernel(X, block=i if self.nonstationary else None) Kinv = np.linalg.inv(K) Alpha = np.dot(Kinv, blockY) block_Kinvs.append(Kinv) block_Alphas.append(Alpha) def predict(Xstar, test_noise_var=0.0, local=False): prior_cov = test_ptree.kernel_matrix(Xstar, Xstar, False) prior_cov += np.eye(prior_cov.shape[0]) * test_noise_var prior_prec = np.linalg.inv(prior_cov) prior_mean = np.zeros((Xstar.shape[0], Y.shape[1])) test_block_idxs = self.block_fn(Xstar) """ if local: nearest = np.argmin([np.min(pair_distances(Xstar, self.X[idxs])) for idxs in self.block_idxs]) neighbors = [nearest,] else: neighbors = range(self.n_blocks) """ source_blocks = set() for i, idxs in enumerate(test_block_idxs): if len(idxs) == 0: continue source_blocks.add(i) for j in self.neighbor_dict[i]: source_blocks.add(j) for i in source_blocks: idxs = self.block_idxs[i] X = self.X[idxs] ptree = self.block_trees[ i] if self.nonstationary else self.predict_tree nv = self.block_covs[i][ 0] if self.nonstationary else self.noise_var Kinv = block_Kinvs[i] Kstar = ptree.kernel_matrix(Xstar, X, False) Kss = ptree.kernel_matrix(Xstar, Xstar, False) if test_noise_var > 0: Kss += np.eye(Kss.shape[0]) * nv mean = np.dot(Kstar, block_Alphas[i]) cov = Kss - np.dot(Kstar, np.dot(Kinv, Kstar.T)) prec = np.linalg.inv(cov) pp = np.linalg.inv(Kss) message_prec = prec - pp weighted_mean = np.dot(prec, mean) prior_mean += weighted_mean prior_prec += message_prec final_cov = np.linalg.inv(prior_prec) final_mean = np.dot(final_cov, prior_mean) return final_mean, final_cov return predict
def gaussian_llgrad_sparse(self, X, Y, grad_X=False, grad_cov=False, max_distance=5.0): import scipy.sparse import scipy.sparse.linalg import scikits.sparse.cholmod t0 = time.time() t0 = time.time() n, dx = X.shape dy = Y.shape[1] gradX = np.zeros(()) gradC = np.zeros(()) if n == 0: if grad_X: gradX = np.zeros(X.shape) if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov, )) return 0.0, gradX, gradC tree = VectorTree(X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params) t05 = time.time() entries = tree.sparse_training_kernel_matrix(X, max_distance, False) t06 = time.time() nzr, nzc, entries_K = np.array(entries[:, 0], dtype=np.int32), np.array( entries[:, 1], dtype=np.int32), entries[:, 2] t07 = time.time() distances = tree.sparse_distances(X, X, nzr, nzc) t08 = time.time() spK = scipy.sparse.coo_matrix((entries_K, (nzr, nzc)), shape=(n, n), dtype=float) spK = (spK + self.noise_var * scipy.sparse.eye(spK.shape[0])).tocsc() t1 = time.time() print " sparse entires %.3f copy %.03f distances %.03f matrix %.03f" % ( t06 - t05, t07 - t06, t08 - t07, t1 - t08) factor = scikits.sparse.cholmod.cholesky(spK) t11 = time.time() Alpha = factor(Y) t12 = time.time() prec = factor.inv() t13 = time.time() #pprec = pdinv(spK.toarray()) t133 = time.time() logdet = factor.logdet() t14 = time.time() print " sparse factor %.3f alpha %.3f inv %.3f pinv %.3f logdet %.3f" % ( t11 - t1, t12 - t11, t13 - t12, t133 - t13, t14 - t133) unpermuted_L = factor.L() P = factor.P() Pinv = np.argsort(P) L = unpermuted_L[Pinv][:, Pinv] ll = -.5 * np.sum(Y * Alpha) ll += -.5 * dy * logdet ll += -.5 * dy * n * np.log(2 * np.pi) t2 = time.time() if grad_X: gradX = np.zeros((n, dx)) for i in range(dx): dK_entries = tree.sparse_kernel_deriv_wrt_xi( X, i, nzr, nzc, distances) sdK = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() d_logdet = -dy * np.asarray( sdK.multiply(prec).sum(axis=1)).reshape((-1, )) #d_logdet = -dy * factor(sdK).diagonal() """ LL = L.toarray() V = 1.0/LL VV = V.reshape((-1,)) nans = np.isinf(VV) VV[nans] = 0 dK = sdK.toarray() d_logdet2 = np.sum(dK * V, axis=1) import pdb; pdb.set_trace() """ dK_alpha = sdK * Alpha scaled = dK_alpha * Alpha gradX[:, i] = d_logdet + np.sum(scaled, axis=1) t3 = time.time() if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov, )) for i in range(ncov): if (i == 0): dKdi = scipy.sparse.eye(X.shape[0]) elif (i == 1): if (len(self.cov.wfn_params) != 1): raise ValueError( 'gradient computation currently assumes just a single scaling parameter for weight function, but currently wfn_params=%s' % cov.wfn_params) dKdi = (spK - (self.noise_var * scipy.sparse.eye(spK.shape[0])) ) / self.cov.wfn_params[0] else: dK_entries = tree.sparse_kernel_deriv_wrt_i( X, X, nzr, nzc, i - 2, distances) #dKdi = self.dKdi(X, i, block=block_i) dKdi = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() dlldi = .5 * np.sum(np.multiply(Alpha, dKdi * Alpha)) dlldi -= .5 * dy * dKdi.multiply( prec).sum() # factor(dKdi).diagonal().sum() gradC[i] = dlldi t4 = time.time() print "sparse tree %.4f kernel %.3f ll %.3f gradX %.3f gradC %.3f total %.3f" % ( t05 - t0, t1 - t05, t2 - t1, t3 - t2, t4 - t3, t4 - t0) return ll, gradX, gradC
def gaussian_llgrad_sparse(self, X, Y, grad_X = False, grad_cov=False, max_distance=5.0): import scipy.sparse import scipy.sparse.linalg import scikits.sparse.cholmod t0 = time.time() t0 = time.time() n, dx = X.shape dy = Y.shape[1] gradX = np.zeros(()) gradC = np.zeros(()) if n==0: if grad_X: gradX = np.zeros(X.shape) if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov,)) return 0.0, gradX, gradC tree = VectorTree(X, 1, self.cov.dfn_str, self.cov.dfn_params, self.cov.wfn_str, self.cov.wfn_params) t05 = time.time() entries = tree.sparse_training_kernel_matrix(X, max_distance, False) t06 = time.time() nzr, nzc, entries_K = np.array(entries[:,0], dtype=np.int32), np.array(entries[:,1], dtype=np.int32), entries[:,2] t07 = time.time() distances = tree.sparse_distances(X, X, nzr, nzc); t08 = time.time() spK = scipy.sparse.coo_matrix((entries_K, (nzr, nzc)), shape=(n,n), dtype=float) spK = (spK + self.noise_var * scipy.sparse.eye(spK.shape[0])).tocsc() t1 = time.time() print " sparse entires %.3f copy %.03f distances %.03f matrix %.03f" % (t06-t05, t07-t06, t08-t07, t1-t08) factor = scikits.sparse.cholmod.cholesky(spK) t11 = time.time() Alpha = factor(Y) t12 = time.time() prec = factor.inv() t13 = time.time() #pprec = pdinv(spK.toarray()) t133 = time.time() logdet = factor.logdet() t14 = time.time() print " sparse factor %.3f alpha %.3f inv %.3f pinv %.3f logdet %.3f" % (t11-t1, t12-t11, t13-t12, t133-t13, t14-t133) unpermuted_L = factor.L() P = factor.P() Pinv = np.argsort(P) L = unpermuted_L[Pinv][:,Pinv] ll = -.5 * np.sum(Y*Alpha) ll += -.5 * dy * logdet ll += -.5 * dy * n * np.log(2*np.pi) t2 = time.time() if grad_X: gradX = np.zeros((n, dx)) for i in range(dx): dK_entries = tree.sparse_kernel_deriv_wrt_xi(X, i, nzr, nzc, distances) sdK = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() d_logdet = -dy * np.asarray(sdK.multiply(prec).sum(axis=1)).reshape((-1,)) #d_logdet = -dy * factor(sdK).diagonal() """ LL = L.toarray() V = 1.0/LL VV = V.reshape((-1,)) nans = np.isinf(VV) VV[nans] = 0 dK = sdK.toarray() d_logdet2 = np.sum(dK * V, axis=1) import pdb; pdb.set_trace() """ dK_alpha = sdK * Alpha scaled = dK_alpha * Alpha gradX[:, i] = d_logdet + np.sum(scaled, axis=1) t3 = time.time() if grad_cov: ncov = 2 + len(self.cov.dfn_params) gradC = np.zeros((ncov,)) for i in range(ncov): if (i == 0): dKdi = scipy.sparse.eye(X.shape[0]) elif (i == 1): if (len(self.cov.wfn_params) != 1): raise ValueError('gradient computation currently assumes just a single scaling parameter for weight function, but currently wfn_params=%s' % cov.wfn_params) dKdi = (spK - (self.noise_var * scipy.sparse.eye(spK.shape[0]))) / self.cov.wfn_params[0] else: dK_entries = tree.sparse_kernel_deriv_wrt_i(X, X, nzr, nzc, i-2, distances) #dKdi = self.dKdi(X, i, block=block_i) dKdi = scipy.sparse.coo_matrix((dK_entries, (nzr, nzc)), shape=spK.shape, dtype=float).tocsc() dlldi = .5 * np.sum(np.multiply(Alpha,dKdi* Alpha)) dlldi -= .5 * dy * dKdi.multiply(prec).sum() # factor(dKdi).diagonal().sum() gradC[i] = dlldi t4 = time.time() print "sparse tree %.4f kernel %.3f ll %.3f gradX %.3f gradC %.3f total %.3f" % (t05-t0, t1-t05, t2-t1, t3-t2, t4-t3, t4-t0) return ll, gradX, gradC