def train_hyperparams( X, y, cov_main, cov_fic, noise_var, noise_prior, optimize_xu=False, sparse_invert=False, save_progress=None ): nllgrad, x0, bounds, build_gp, covs_from_vector = optimize_gp_hyperparams( noise_var=noise_var, cov_main=cov_main, cov_fic=cov_fic, X=X, y=y, noise_prior=noise_prior, optimize_Xu=optimize_xu, sparse_invert=sparse_invert, build_tree=False, ) def nllgrad_checkpoint(v): noise_var, cov_main, cov_fic = covs_from_vector(v) save_progress(noise_var, cov_main, cov_fic) return nllgrad(v) f_obj = nllgrad if not save_progress else nllgrad_checkpoint result, rounds = bfgs_bump(nllgrad=f_obj, x0=x0, options={"disp": True}, bounds=bounds) print "optimized in", rounds, "rounds" noise_var, cov_main, cov_fic = covs_from_vector(result.x) return noise_var, cov_main, cov_fic
def choose_best_hparams(covs, X, y, noise_prior, sparse_invert=False): noise_var, cov_main, cov_fic = covs[0] nllgrad, x0, bounds, build_gp, covs_from_vector = optimize_gp_hyperparams( noise_var=noise_var, cov_main=cov_main, cov_fic=cov_fic, X=X, y=y, noise_prior=noise_prior, sparse_invert=sparse_invert, build_tree=False, ) best_ll = np.float("-inf") for (noise_var, cov_main, cov_fic) in covs: gp = GP( compute_ll=True, noise_var=noise_var, cov_main=cov_main, cov_fic=cov_fic, X=X, y=y, sparse_invert=False, build_tree=False, ) ll = gp.ll del gp ll += ( noise_prior.log_p(noise_var) + (cov_main.prior_logp() if cov_main is not None else 0) + (cov_fic.prior_logp() if cov_fic is not None else 0) ) print "params", noise_var, cov_main, cov_fic, "got likelihood", ll if ll > best_ll: best_ll = ll best_noise_var = noise_var best_cov_main = cov_main best_cov_fic = cov_fic return best_noise_var, best_cov_main, best_cov_fic
def _check_gradient(self, cov, eps=1e-8): gp = GP(X=self.X, y=self.y, noise_var=self.noise_var, cov_main=cov, compute_ll=True, compute_grad=True) grad = gp.ll_grad nllgrad, x0, bounds, build_gp, _ = optimize_gp_hyperparams(X=self.X, y=self.y, noise_var=self.noise_var, cov_main=cov, sparse_invert=False) n = len(x0) kp = x0 empirical_grad = np.zeros(n) for i in range(n): kp[i] -= eps gp1 = build_gp(kp, compute_ll=True) l1 = gp1.log_likelihood() kp[i] += 2*eps gp2 = build_gp(kp, compute_ll=True) l2 = gp2.log_likelihood() kp[i] -= eps empirical_grad[i] = (l2 - l1)/ (2*eps) self.assertTrue( (np.abs(grad - empirical_grad) < 0.00001 ).all() )
def test_gradient(self): grad = self.gp.ll_grad nllgrad, x0, bounds, build_gp, _ = optimize_gp_hyperparams(X=self.X, y=self.y1, basis=self.basis, featurizer_recovery=self.featurizer_recovery, param_mean=self.b, param_cov=self.B, noise_var=self.noise_var, cov_main=self.cov) n = len(x0) kp = x0 eps = 1e-6 empirical_grad = np.zeros(n) for i in range(n): kp[i] -= eps gp1 = build_gp(kp, compute_ll=True) l1 = gp1.log_likelihood() kp[i] += 2*eps gp2 = build_gp(kp, compute_ll=True) l2 = gp2.log_likelihood() kp[i] -= eps empirical_grad[i] = (l2 - l1)/ (2*eps) self.assertTrue( (np.abs(grad - empirical_grad) < 0.001 ).all() )
def test_gradient(self): cov_main = GPCov(wfn_params=[.5,], dfn_params=[ 2.5,], wfn_str="compact2", dfn_str="euclidean") cov_fic = GPCov(wfn_params=[1.2,], dfn_params=[ 1.5,], wfn_str="se", dfn_str="euclidean", Xu = self.u) noise_var = 1.0 gp = GP(X=self.X, y=self.y1, noise_var = noise_var, cov_main = cov_main, cov_fic = cov_fic, compute_ll=True, compute_grad=True, compute_xu_grad=True, sparse_threshold=0, build_tree=False, sparse_invert=True) grad = gp.ll_grad nllgrad, x0, bounds, build_gp, _ = optimize_gp_hyperparams(X=self.X, y=self.y1, noise_var=noise_var, cov_main=cov_main, cov_fic=cov_fic, sparse_threshold=0) n = len(x0) kp = x0 eps = 1e-6 empirical_grad = np.zeros(n) for i in range(n): kp[i] -= eps gp1 = build_gp(kp, compute_ll=True) l1 = gp1.log_likelihood() kp[i] += 2*eps gp2 = build_gp(kp, compute_ll=True) l2 = gp2.log_likelihood() kp[i] -= eps empirical_grad[i] = (l2 - l1)/ (2*eps) self.assertTrue( (np.abs(grad - empirical_grad) < 1e-6 ).all() )