def __init__(self, X, y, n_importance, prior_log_pdf, ridge=0., num_shogun_threads=1): self.n_importance = n_importance self.prior_log_pdf = prior_log_pdf self.ridge = ridge self.X = X self.y = y self.num_shogun_threads = num_shogun_threads # tell shogun to use 1 thread only logger.debug("Using Shogun with %d threads" % self.num_shogun_threads) sg.ZeroMean().parallel.set_num_threads(self.num_shogun_threads) # shogun representation of data self.sg_labels = sg.BinaryLabels(self.y) self.sg_feats_train = sg.RealFeatures(self.X.T) # ARD: set set theta, which is in log-scale, as kernel weights self.sg_kernel = sg.GaussianARDKernel(10, 1) self.sg_mean = sg.ZeroMean() self.sg_likelihood = sg.LogitLikelihood()
def propose(self, current, current_log_pdf, samples, accepted): # random variables from a fixed random stream without modifying the current one rnd_state = np.random.get_state() np.random.set_state(self.hmc_rnd_state) # sample momentum and leapfrog parameters p0 = self.momentum.sample() p0_log_pdf = self.momentum.log_pdf(p0) num_steps = np.random.randint(self.num_steps_min, self.num_steps_max + 1) step_size = np.random.rand() * (self.step_size_max - self.step_size_min) + self.step_size_min # restore random state self.hmc_rnd_state = np.random.get_state() np.random.set_state(rnd_state) logger.debug("Simulating Hamiltonian flow") q, p = self.integrator(current, self.target.grad, p0, self.momentum.grad, step_size, num_steps) # compute acceptance probability, extracting log_pdf of q p_log_pdf = self.momentum.log_pdf(p) # use a function call to be able to overload it for KMC acc_prob, log_pdf_q = self.accept_prob_log_pdf(current, q, p0_log_pdf, p_log_pdf, current_log_pdf) return q, acc_prob, log_pdf_q
def leapfrog_no_storing(q, dlogq, p, dlogp, step_size=0.3, num_steps=1): logger.debug("Entering") # create copy of state p = np.array(p.copy()) q = np.array(q.copy()) # half momentum update p = p - (step_size / 2) * -dlogq(q) # alternate full variable and momentum updates for i in range(num_steps): q = q + step_size * -dlogp(p) # precompute since used for two half-steps dlogq_eval = dlogq(q) # first half momentum update p = p - (step_size / 2) * -dlogq_eval # second half momentum update if i != num_steps - 1: p = p - (step_size / 2) * -dlogq_eval logger.debug("Leaving") return q, p
def leapfrog_friction_habc_no_storing(c, V, q, dlogq, p, dlogp, step_size=0.3, num_steps=1): logger.debug("Entering") """ MATLAB code by Chen: function [ newx ] = sghmc( U, gradU, m, dt, nstep, x, C, V ) %% SGHMC using gradU, for nstep, starting at position x p = randn( size(x) ) * sqrt( m ); B = 0.5 * V * dt; D = sqrt( 2 * (C-B) * dt ); for i = 1 : nstep p = p - gradU( x ) * dt - p * C * dt + randn(1)*D; x = x + p./m * dt; end newx = x; end """ # friction term (as in HABC) D = len(q) B = 0.5 * V * step_size C = np.eye(D) * c + V L_friction = np.linalg.cholesky(2 * step_size * (C - B)) zeros_D = np.zeros(D) # create copy of state p = np.array(p.copy()) q = np.array(q.copy()) # alternate full momentum and variable updates for _ in range(num_steps): friction = sample_gaussian(N=1, mu=zeros_D, Sigma=L_friction, is_cholesky=True)[0] # just like normal momentum update but with friction p = p - step_size * -dlogq(q) - step_size * C.dot(-dlogp(p)) + friction # normal position update q = q + step_size * -dlogp(p) logger.debug("Leaving") return q, p
def compute_b(X, omega, u): assert len(X.shape) == 2 m = 1 if np.isscalar(u) else len(u) D = X.shape[1] projections_sum = np.zeros(m) Phi2 = feature_map(X, omega, u) for d in range(D): if d % 10 == 0: logger.debug("Dimension %d/%d" % (d, D)) projections_sum += np.mean(-Phi2 * (omega[d, :]**2), 0) return -projections_sum
def xvalidate(Z, lmbda, omega, u, n_folds=5, num_repetitions=1): Js = np.zeros((num_repetitions, n_folds)) for j in range(num_repetitions): kf = KFold(len(Z), n_folds=n_folds, shuffle=True) for i, (train, test) in enumerate(kf): logger.debug("xvalidation fold %d/%d" % (i + 1, len(kf))) # train theta = score_matching_sym(Z[train], lmbda, omega, u) # evaluate Js[j, i] = objective(Z[test], theta, lmbda, omega, u) return np.mean(Js, 0)
def compute(self): logger.debug("Entering") random_start_state = np.random.get_state() acc_mean, acc_est_mean, log_det, log_det_est, steps_taken, random_start_state = \ self.compute_trajectory(random_start_state) logger.info("Submitting results to aggregator") result = TrajectoryJobResult(self.D, self.N, acc_mean, acc_est_mean, log_det, log_det_est, steps_taken, random_start_state) self.aggregator.submit_result(result) logger.debug("Leaving")
def multicore_fun(log2_sigma, log2_lmbda, num_repetitions, num_folds, Z, m): D = Z.shape[1] lmbda = 2**log2_lmbda sigma = 2**log2_sigma gamma = 0.5 * (sigma**2) folds = np.zeros(num_repetitions) for j in range(num_repetitions): logger.debug("xvalidation repetition %d/%d" % (j + 1, num_repetitions)) omega, u = sample_basis(D, m, gamma) folds[j] = np.mean( xvalidate(Z, lmbda, omega, u, num_folds, num_repetitions=1)) result = np.mean(folds) logger.info("cma particle, sigma: %.2f, lambda: %.6f, J=%.4f" % \ (sigma, lmbda, result)) return result
def compute_C(X, omega, u): assert len(X.shape) == 2 m = 1 if np.isscalar(u) else len(u) N = X.shape[0] D = X.shape[1] C = np.zeros((m, m)) projection = np.dot(X, omega) + u np.sin(projection, projection) projection *= -np.sqrt(2. / m) temp = np.zeros((N, m)) for d in range(D): if d % 10 == 0: logger.debug("Dimension %d/%d" % (d, D)) temp = -projection * omega[d, :] C += np.tensordot(temp, temp, [0, 0]) return C / N
def log_pdf(self, theta): # sample likelihood and evaluate Gaussian epsilon kernel for each sampled dataset log_liks = np.zeros(self.n_lik_samples) logger.debug("Simulating datasets") for i in range(self.n_lik_samples): # summary statistic: mean pseudo_data = self.simulator(theta) diff = np.linalg.norm(pseudo_data - self.data) # logger.debug("Diff=%.6f" % diff) log_liks[i] = -0.5 * (diff**2) / self.epsilon**2 m = np.mean(np.exp(log_liks)) logger.debug("Likelihood: %.2f", m) result = np.log(m) + self.prior.log_pdf(theta) if np.isnan(result): result = -np.inf return result
def _update(self, theta): logger.debug("Entering") state_hash = hashlib.sha1(str(self.fixed_rnd_state)).hexdigest() logger.debug("Simulating using rnd_state %s" % state_hash) D = self.abc_target.D # sample pseudo data to fit conditional model pseudo_datas = np.zeros((self.abc_target.n_lik_samples, D)) # sticky random numbers: fixed seed to simulate data current_state = np.random.get_state() np.random.set_state(self.fixed_rnd_state) for i in range(len(pseudo_datas)): pseudo_datas[i] = self.abc_target.simulator(theta) np.random.set_state(current_state) # fit Gaussian, add ridge on diagonal for the epsilon likelihood kernel self.mu = np.mean(pseudo_datas, 0) Sigma = np.cov( pseudo_datas.T) + np.eye(D) * (self.abc_target.epsilon**2) self.L = np.linalg.cholesky(Sigma) # logger.debug("Simulation") # logger.debug("Theta: %s" % str(theta[:3])) # logger.debug("Mean: %s" % str(self.mu[:3])) logger.debug("Entering")
def compute_C_memory(X, omega, u): assert len(X.shape) == 2 logger.debug("Computing derivatives") Phi2 = feature_map_derivatives(X, omega, u) d = X.shape[1] N = X.shape[0] m = Phi2.shape[2] # ## bottleneck! use np.einsum # C = np.zeros((m, m)) # t = time.time() # for i in range(N): # for ell in range(d): # phi2 = Phi2[ell, i] # C += np.outer(phi2, phi2) # print("loop", time.time()-t) # # #roughly 5x faster than the above loop # t = time.time() # Phi2_reshaped = Phi2.reshape(N*d, m) # C2=np.einsum('ij,ik->jk', Phi2_reshaped, Phi2_reshaped) # print("einsum", time.time()-t) # # #cython implementation, is slowest # t = time.time() # Phi2_reshaped = Phi2.reshape(N*d, m) # C3 = outer_sum_cython(Phi2_reshaped) # print("cython", time.time()-t) # t = time.time() logger.debug("Computing derivative covariance") Phi2_reshaped = Phi2.reshape(N * d, m) C4 = np.tensordot(Phi2_reshaped, Phi2_reshaped, [0, 0]) # print("tensordot", time.time()-t) return C4 / N
def score_matching_sym(X, lmbda, omega, u, b=None, C=None): logger.debug("Computing b") if b is None: b = compute_b(X, omega, u) logger.debug("Computing C") if C is None: C = compute_C(X, omega, u) logger.debug("Linear solve") theta = np.linalg.solve(C + lmbda * np.eye(len(C)), b) return theta
def grad(self, theta): logger.debug("Entering") # update likelihood term self._update(theta) log_lik = lambda theta: log_gaussian_pdf( theta, self.mu, self.L, is_cholesky=True) # logger.debug("Computing SPSA gradient") grad_lik_est = SPSA(log_lik, theta, stepsize=5., num_repeats=self.num_spsa_repeats) grad_prior = self.abc_target.prior.grad(theta) # update online covariance matrix estimate self.grad_cov_est_n += 1 delta = grad_lik_est - self.grad_cov_est_mean self.grad_cov_est_mean += delta / self.grad_cov_est_n self.grad_cov_est_M2 += np.outer(delta, grad_lik_est - self.grad_cov_est_mean) if self.grad_cov_est_n > 1: self.grad_cov_est = self.grad_cov_est_M2 / (self.grad_cov_est_n - 1) logger.debug("Variance grad_0: %.4f" % self.grad_cov_est[0, 0]) # logger.debug("grad_lik_est: %s" % str(grad_lik_est)) # logger.debug("grad_prior: %s" % str(grad_prior)) # logger.debug("||grad_lik_est||: %.2f" % np.linalg.norm(grad_lik_est)) # logger.debug("||grad_prior||: %.2f" % np.linalg.norm(grad_prior)) # logger.debug("||grad_lik_est-grad_prior||: %.2f" % np.linalg.norm(grad_lik_est-grad_prior)) logger.debug("Leaving") return grad_lik_est + grad_prior
def compute_trajectory(self, random_start_state=None): logger.debug("Entering") if random_start_state is not None: np.random.set_state(random_start_state) else: random_start_state = np.random.get_state() # momentum L_p = np.linalg.cholesky(np.eye(self.D) * self.sigma_p) self.logp = lambda x: log_gaussian_pdf( x, Sigma=L_p, compute_grad=False, is_cholesky=True) self.dlogp = lambda x: log_gaussian_pdf( x, Sigma=L_p, compute_grad=True, is_cholesky=True) self.p_sample = lambda: sample_gaussian( N=1, mu=np.zeros(self.D), Sigma=L_p, is_cholesky=True)[0] self.p_sample = lambda: sample_gaussian( N=1, mu=np.zeros(self.D), Sigma=L_p, is_cholesky=True)[0] # set up target and momentum densities and gradients self.set_up() logger.info("Learning kernel bandwidth") sigma = select_sigma_grid(self.Z, lmbda=self.lmbda, log2_sigma_max=15) logger.info("Using lmbda=%.2f, sigma: %.2f" % (self.lmbda, sigma)) logger.info("Computing kernel matrix") K = gaussian_kernel(self.Z, sigma=sigma) logger.info("Estimate density in RKHS") b = _compute_b_sym(self.Z, K, sigma) C = _compute_C_sym(self.Z, K, sigma) a = score_matching_sym(self.Z, sigma, self.lmbda, K, b, C) # logger.info("Computing objective function") # J = _objective_sym(Z, sigma, self.lmbda, a, K, b, C) # J_xval = np.mean(xvalidate(Z, 5, sigma, self.lmbda, K)) # logger.info("N=%d, sigma: %.2f, lambda: %.2f, J(a)=%.2f, XJ(a)=%.2f" % \ # (self.N, sigma, self.lmbda, J, J_xval)) kernel_grad = lambda x, X=None: gaussian_kernel_grad(x, X, sigma) dlogq_est = lambda x: log_pdf_estimate_grad(x, a, self.Z, kernel_grad) logger.info("Simulating trajectory for L=%d steps of size %.2f" % \ (self.num_steps, self.step_size)) # starting state p0 = self.p_sample() q0 = self.q_sample() Qs, Ps = leapfrog(q0, self.dlogq, p0, self.dlogp, self.step_size, self.num_steps, self.max_steps) # run second integrator for same amount of steps steps_taken = len(Qs) logger.info("%d steps taken" % steps_taken) Qs_est, Ps_est = leapfrog(q0, dlogq_est, p0, self.dlogp, self.step_size, steps_taken) logger.info("Computing average acceptance probabilities") log_acc = compute_log_accept_pr(q0, p0, Qs, Ps, self.logq, self.logp) log_acc_est = compute_log_accept_pr(q0, p0, Qs_est, Ps_est, self.logq, self.logp) acc_mean = np.mean(np.exp(log_acc)) acc_est_mean = np.mean(np.exp(log_acc_est)) logger.info("Computing average volumes") log_det = compute_log_det_trajectory(Qs, Ps) log_det_est = compute_log_det_trajectory(Qs_est, Ps_est) logger.info("Average acceptance prob: %.2f, %.2f" % (acc_mean, acc_est_mean)) logger.info("Log-determinant: %.2f, %.2f" % (log_det, log_det_est)) logger.debug("Leaving") return acc_mean, acc_est_mean, log_det, log_det_est, steps_taken, random_start_state
def propose(self, current, current_log_pdf, samples, accepted): # random variables from a fixed random stream without modifying the current one rnd_state = np.random.get_state() np.random.set_state(self.hmc_rnd_state) if current_log_pdf is None: current_log_pdf = self.orig_target.log_pdf(current) # sample momentum and leapfrog parameters p0 = self.momentum.sample() num_steps = np.random.randint(self.num_steps_min, self.num_steps_max + 1) step_size = np.random.rand() * (self.step_size_max - self.step_size_min) + self.step_size_min # restore random state self.hmc_rnd_state = np.random.get_state() np.random.set_state(rnd_state) logger.debug("Simulating Hamiltonian flow") Qs, Ps = leapfrog(current, self.target.grad, p0, self.momentum.grad, step_size, num_steps) q=Qs[-1] p=Ps[-1] logger.debug("Momentum start: %s" % str(p0)) logger.debug("Momentum end: %s" % str(p)) # compute acceptance probability, extracting log_pdf of q p0_log_pdf = self.momentum.log_pdf(p0) p_log_pdf = self.momentum.log_pdf(p) # use a function call to be able to overload it for KMC acc_prob, log_pdf_q = self.accept_prob_log_pdf(current, q, p0_log_pdf, p_log_pdf, current_log_pdf, samples) if True and (len(samples) % 100) ==0: logger.debug("Plotting") import matplotlib.pyplot as plt res = 50 Xs_q = np.linspace(-4,4, res) Ys_q = np.linspace(-4,4, res) # evaluate density and estimate D1=0 D2=1 def dummy_grad(X_2d): theta = current.copy() # theta = np.mean(self.Z, 0) theta[D1]=X_2d[0] theta[D2]=X_2d[1] return self.target.grad(theta) def dummy(X_2d): theta = current.copy() # theta = np.mean(self.Z, 0) theta[D1]=X_2d[0] theta[D2]=X_2d[1] return self.target.log_pdf(theta) # plt.figure() # G = evaluate_density_grid(Xs_q, Ys_q, dummy) # plot_array(Xs_q, Ys_q, G) # plt.plot(self.Z[:,D1], self.Z[:,D2], '.') # plt.plot(Qs[:,D1], Qs[:,D2], 'r-') # plt.plot(samples[:,D1], samples[:,D2], 'm-') # plt.plot(current[D1], current[D2], 'b*', markersize=15) # plt.plot(Qs[-1,D1], Qs[-1,D2], 'r*', markersize=15) plt.figure() G_norm, U_q, V, X, Y = evaluate_gradient_grid(Xs_q, Ys_q, dummy_grad) plot_array(Xs_q, Ys_q, G_norm) plt.plot(self.Z[:,D1], self.Z[:,D2], '.') plt.plot(Qs[:,D1], Qs[:,D2], 'r-') plt.plot(samples[:,D1], samples[:,D2], 'm-') plt.plot(current[D1], current[D2], 'b*', markersize=15) plt.plot(Qs[-1,D1], Qs[-1,D2], 'r*', markersize=15) plt.quiver(X, Y, U_q, V, color='m') # plt.figure() # plt.plot(Ps[:,D1], Ps[:,D2], 'r-') # plt.plot(p0[D1], p0[D2], 'b*', markersize=15) # plt.plot(Ps[-1,D1], Ps[-1,D2], 'r*', markersize=15) # plt.title('momentum') acc_probs = np.exp(compute_log_accept_pr(current, p0, Qs, Ps, self.orig_target.log_pdf, self.momentum.log_pdf)) H_ratios = np.exp(compute_log_accept_pr(current, p0, Qs, Ps, self.target.log_pdf, self.momentum.log_pdf)) target_ratio = [np.min([1,np.exp(self.orig_target.log_pdf(x)-current_log_pdf)]) for x in Qs] momentum_ratio = [np.min([1,np.exp(self.momentum.log_pdf(x)-p0_log_pdf)]) for x in Ps] target_log_pdf = np.exp(np.array([self.orig_target.log_pdf(x) for x in Qs])) # # # plt.figure(figsize=(12,4)) # plt.subplot(151) # plt.plot(acc_probs) # plt.plot([0, len(acc_probs)], [acc_probs.mean(), acc_probs.mean()]) # plt.title("acc_probs") # plt.subplot(152) # plt.plot(target_ratio) # plt.title("target_ratio") # plt.subplot(153) # plt.plot(momentum_ratio) # plt.title("momentum_ratio") # plt.subplot(154) # plt.plot(H_ratios) # plt.title("H_ratios") # plt.subplot(155) # plt.plot(target_log_pdf) # plt.title("target_log_pdf") plt.show() return q, acc_prob, log_pdf_q
def accept_prob_log_pdf(self, current, q, p0_log_pdf, p_log_pdf, current_log_pdf=None): # potentially re-use log_pdf of last accepted state if current_log_pdf is None: current_log_pdf = self.target.log_pdf(current) log_pdf_q = self.target.log_pdf(q) H0 = -current_log_pdf - p0_log_pdf H = -log_pdf_q - p_log_pdf difference = -H + H0 acc_prob = np.exp(np.minimum(0., difference)) logger.debug("log_pdf current=%.2f" % current_log_pdf) logger.debug("log_pdf q=%.2f" % log_pdf_q) logger.debug("difference_q=%.2f" % (log_pdf_q-current_log_pdf)) logger.debug("difference_p=%.2f" % (p_log_pdf-p0_log_pdf)) logger.debug("H0=%.2f" % H0) logger.debug("H=%.2f" % H) logger.debug("H0-H=%.2f" % difference) return acc_prob, log_pdf_q
def compute_trajectory(self, random_start_state=None): logger.debug("Entering") if random_start_state is not None: np.random.set_state(random_start_state) else: random_start_state = np.random.get_state() # momentum L_p = np.linalg.cholesky(np.eye(self.D) * self.sigma_p) self.logp = lambda x: log_gaussian_pdf( x, Sigma=L_p, compute_grad=False, is_cholesky=True) self.dlogp = lambda x: log_gaussian_pdf( x, Sigma=L_p, compute_grad=True, is_cholesky=True) self.p_sample = lambda: sample_gaussian( N=1, mu=np.zeros(self.D), Sigma=L_p, is_cholesky=True)[0] # set up target and momentum densities and gradients self.set_up() dlogq_est = self.update_density_estimate() # random number of steps? if self.max_steps is not None: steps = np.random.randint(self.num_steps, self.max_steps + 1) else: steps = self.num_steps logger.info("Simulating trajectory for at least L=%d steps of size %.2f" % \ (self.num_steps, self.step_size)) # starting state p0 = self.p_sample() q0 = self.q_sample() Qs, Ps = leapfrog(q0, self.dlogq, p0, self.dlogp, self.step_size, steps) # run second integrator for same amount of steps steps_taken = len(Qs) Qs_est, Ps_est = leapfrog(q0, dlogq_est, p0, self.dlogp, self.step_size, steps_taken) logger.info("%d steps taken" % steps_taken) logger.info("Computing average acceptance probabilities") log_acc = compute_log_accept_pr(q0, p0, Qs, Ps, self.logq, self.logp) log_acc_est = compute_log_accept_pr(q0, p0, Qs_est, Ps_est, self.logq, self.logp) acc_mean = np.mean(np.exp(log_acc)) acc_est_mean = np.mean(np.exp(log_acc_est)) idx09 = int(len(log_acc) * 0.9) acc_mean10 = np.mean(np.exp(log_acc[idx09:])) acc_est_mean10 = np.mean(np.exp(log_acc_est[idx09:])) logger.info("Computing average volumes") log_det = compute_log_det_trajectory(Qs, Ps) log_det_est = compute_log_det_trajectory(Qs_est, Ps_est) logger.info("Average acceptance prob: %.2f, %.2f" % (acc_mean, acc_est_mean)) logger.info("Average acceptance prob (last 10 percent): %.2f, %.2f" % (acc_mean10, acc_est_mean10)) logger.info("Log-determinant: %.2f, %.2f" % (log_det, log_det_est)) logger.debug("Leaving") return acc_mean, acc_est_mean, log_det, log_det_est, steps_taken, random_start_state
def compute(self): # set up target if possible self.target.set_up() # remember set up time start_time = time() self.set_up() self.time_taken_set_up = time() - start_time # sampling time start_time = time() self.samples = np.zeros((self.num_iterations, self.D)) + np.nan self.proposals = np.zeros((self.num_iterations, self.D)) + np.nan self.accepted = np.zeros(self.num_iterations) + np.nan self.acc_prob = np.zeros(self.num_iterations) + np.nan self.log_pdf = np.zeros(self.num_iterations) + np.nan current = self.start current_log_pdf = None logger.info("Starting MCMC in D=%d dimensions" % self.D) for i in range(self.num_iterations): # print chain progress log_str = "MCMC iteration %d/%d, current log_pdf: %.6f, avg acceptance=%.3f" % (i + 1, self.num_iterations, np.nan if self.log_pdf[i-1] is None else self.log_pdf[i-1], self.avg_accept) if ((i + 1) % (self.num_iterations / 10)) == 0: logger.info(log_str) else: logger.debug(log_str) # marginal sampler: do not re-use recompute log-pdf if self.recompute_log_pdf: current_log_pdf = None # generate proposal and acceptance probability logger.debug("Performing MCMC step") self.proposals[i], self.acc_prob[i], log_pdf_proposal = self.propose(current, current_log_pdf, self.samples[:i], self.avg_accept) # accept-reject r = np.random.rand() self.accepted[i] = r < self.acc_prob[i] logger.debug("Proposed %s" % str(self.proposals[i])) logger.debug("Acceptance prob %.4f" % self.acc_prob[i]) logger.debug("Accepted: %d" % self.accepted[i]) # update running mean according to knuth's stable formula self.avg_accept += (self.accepted[i] - self.avg_accept) / (i + 1) # update state logger.debug("Updating chain") if self.accepted[i]: current = self.proposals[i] current_log_pdf = log_pdf_proposal # store sample self.samples[i] = current self.log_pdf[i] = current_log_pdf self.time_taken_sampling = time() - start_time logger.info("Computing %d posterior statistics" % len(self.statistics)) self.posterior_statistics = {} for (k, v) in self.statistics.items(): logger.info("Computing posterior statistic %s using num_warmup=%d, thin=%d" \ % (k, self.num_warmup, self.thin_step)) inds = np.arange(self.num_warmup, len(self.samples), step=self.thin_step) self.posterior_statistics[k] = v(self.samples[inds]) logger.info("Submitting results to aggregator") self.submit_to_aggregator()