def moment_match_unstable(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: Attempt to compute full site covariance, including cross terms. However, this makes things unstable. """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) lZ = self.log_expected_likelihood(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power) dlZ = self.dlZ_dm(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power)[:, None] d2lZ = jacrev(self.dlZ_dm, argnums=3)(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power) # d2lZ = np.diag(np.diag(d2lZ)) # discard cross terms id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ num_components = int(cav_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn( cav_mean[num_components:]) subband_cov, modulator_cov = cav_cov[:num_components, : num_components], cav_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean const = power**-0.5 * (2 * pi * hyp)**(0.5 - 0.5 * power) mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0] var = hyp / power + (self.link_fn(sigma_points).T**2 @ np.diag(subband_cov)[..., None])[:, 0] normpdf = const * (2 * pi * var)**-0.5 * np.exp(-0.5 * (y - mu)**2 / var) Z = np.sum(w * normpdf) Zinv = 1. / (Z + 1e-8) lZ = np.log(Z + 1e-8) dZ1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / var * normpdf, axis=-1) dZ2 = np.sum(w * (sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1 * normpdf, axis=-1) dlZ = Zinv * np.block([dZ1, dZ2]) d2Z1 = np.sum(w * self.link_fn(sigma_points)**2 * (((y - mu) / var)**2 - var**-1) * normpdf, axis=-1) d2Z2 = np.sum(w * (((sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1)**2 - np.diag(modulator_cov)[..., None]**-1) * normpdf, axis=-1) d2lZ = np.diag(-dlZ**2 + Zinv * np.block([d2Z1, d2Z2])) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ[ ..., None] # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def __init__(self, site_params=None, intmethod='GH', num_cub_pts=20): self.site_params = site_params if intmethod == 'GH': self.cubature_func = lambda dim: gauss_hermite(dim, num_cub_pts ) # Gauss-Hermite elif intmethod == 'UT3': self.cubature_func = lambda dim: symmetric_cubature_third_order( dim) # Unscented transform (3rd order) elif (intmethod == 'UT5') or (intmethod == 'UT'): self.cubature_func = lambda dim: symmetric_cubature_fifth_order( dim) # Unscented transform (5th order) else: raise NotImplementedError('integration method not recognised')
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) # sigma_points = np.sqrt(2) * np.sqrt(v) * x + m # scale locations according to cavity dist. sigma_points = np.sqrt(cav_cov[1, 1]) * x + cav_mean[ 1] # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity f2 = self.link_fn(sigma_points)**2. / power obs_var = f2 + cav_cov[0, 0] const = power**-0.5 * (2 * pi * self.link_fn(sigma_points)**2.)**( 0.5 - 0.5 * power) normpdf = const * (2 * pi * obs_var)**-0.5 * np.exp( -0.5 * (y - cav_mean[0, 0])**2 / obs_var) Z = np.sum(w * normpdf) Zinv = 1. / np.maximum(Z, 1e-8) lZ = np.log(np.maximum(Z, 1e-8)) dZ_integrand1 = (y - cav_mean[0, 0]) / obs_var * normpdf dlZ1 = Zinv * np.sum(w * dZ_integrand1) dZ_integrand2 = (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1] * normpdf dlZ2 = Zinv * np.sum(w * dZ_integrand2) d2Z_integrand1 = (-(f2 + cav_cov[0, 0])**-1 + ((y - cav_mean[0, 0]) / obs_var)**2) * normpdf d2lZ1 = -dlZ1**2 + Zinv * np.sum(w * d2Z_integrand1) d2Z_integrand2 = (-cav_cov[1, 1]**-1 + ( (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1])**2) * normpdf d2lZ2 = -dlZ2**2 + Zinv * np.sum(w * d2Z_integrand2) dlZ = np.block([[dlZ1], [dlZ2]]) d2lZ = np.block([[d2lZ1, 0], [0., d2lZ2]]) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def variational_expectation_cubature(self, y, post_mean, post_cov, hyp=None, cubature_func=None): """ Computes the "variational expectation" via cubature, i.e. the expected log-likelihood, and its derivatives w.r.t. the posterior mean E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ with EP power a. :param y: observed data (yₙ) [scalar] :param post_mean: posterior mean (mₙ) [scalar] :param post_cov: posterior variance (vₙ) [scalar] :param hyp: likelihood hyperparameter [scalar] :param cubature_func: the function to compute sigma points and weights to use during cubature :return: exp_log_lik: the expected log likelihood, E[log p(yₙ|fₙ)] [scalar] dE_dm: derivative of E[log p(yₙ|fₙ)] w.r.t. mₙ [scalar] dE_dv: derivative of E[log p(yₙ|fₙ)] w.r.t. vₙ [scalar] """ if cubature_func is None: x, w = gauss_hermite(post_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(post_mean.shape[0]) # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist. sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ) weighted_log_likelihood_eval = w * self.evaluate_log_likelihood( y, sigma_points, hyp) # Compute expected log likelihood via cubature: # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ) exp_log_lik = np.sum(weighted_log_likelihood_eval) # Compute first derivative via cubature: # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ) invv = np.diag(post_cov)[:, None]**-1 dE_dm = np.sum(invv * (sigma_points - post_mean) * weighted_log_likelihood_eval, axis=-1)[:, None] # Compute second derivative via cubature (deriv. w.r.t. var = 0.5 * 2nd deriv. w.r.t. mean): # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ) dE_dv = np.sum( (0.5 * (invv**2 * (sigma_points - post_mean)**2) - 0.5 * invv) * weighted_log_likelihood_eval, axis=-1) dE_dv = np.diag(dE_dv) return exp_log_lik, dE_dm, dE_dv
def statistical_linear_regression(self, cav_mean, cav_cov, hyp=None, cubature_func=None): """ This gives the same result as above - delete """ num_components = int(cav_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn( cav_mean[num_components:]) subband_cov, modulator_cov = cav_cov[:num_components, : num_components], cav_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean lik_expectation, lik_covariance = ( self.link_fn(sigma_points).T @ subband_mean).T, hyp # Compute zₙ via cubature: # muₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] mu = np.sum(w * lik_expectation, axis=-1)[:, None] # Compute variance S via cubature: # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]] S = np.sum(w * ((lik_expectation - mu) * (lik_expectation - mu) + lik_covariance), axis=-1)[:, None] # Compute cross covariance C via cubature: # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)' C = np.sum(w * np.block([[ self.link_fn(sigma_points) * np.diag(subband_cov)[..., None] ], [sigma_points - modulator_mean]]) * (lik_expectation - mu), axis=-1)[:, None] # Compute derivative of mu via cubature: omega = np.sum( w * np.block([[self.link_fn(sigma_points)], [ np.diag(modulator_cov)[..., None]**-1 * (sigma_points - modulator_mean) * lik_expectation ]]), axis=-1)[None, :] return mu, S, C, omega
def rollout_utility_archive(x, bounds, func_policy, depth_h, _queries, _values, N_q, n_sample=None, decay_rate=0.9, ARD_Flag=False, length_scale=None): #print(depth_h) global U if len(x.shape) == 1: x = np.array([x]) kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale) gp_model = fit(_queries, _values, kernel) #todo:memo if depth_h == 0: U += ei(x, bounds, gp_model) else: U += ei(x, bounds, gp_model) _queries = np.concatenate([_queries, x]) points, weights = gauss_hermite(x, gp_model, N_q) for i in range(N_q): val = np.array([[points[0][i]]]) _values = np.concatenate([_values, val]) kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale) #print("X",_queries) #print("Y",_values) _gp_model = fit(_queries, _values, kernel) #todo:memo #print(i,"afterfit_afterker") x_next = func_policy(_gp_model, depth_h, bounds) U = U + weights[i] * decay_rate * rollout_utility_archive( x_next, bounds, func_policy, depth_h - 1, _queries, _values, N_q, decay_rate, ARD_Flag=ARD_Flag, length_scale=length_scale) _values = np.copy(_values[:-1, :]) _queries = np.copy(_queries[:-1, :]) _U = U U = 0 return _U
def variational_expectation(self, y, post_mean, post_cov, hyp=None, cubature_func=None): """ """ num_components = int(post_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = post_mean[:num_components], self.link_fn( post_mean[num_components:]) subband_cov, modulator_cov = post_cov[:num_components, : num_components], post_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean modulator_var = np.diag(subband_cov)[..., None] mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0] lognormpdf = -0.5 * np.log(2 * pi * hyp) - 0.5 * (y - mu)**2 / hyp const = -0.5 / hyp * ( self.link_fn(sigma_points).T**2 @ modulator_var)[:, 0] exp_log_lik = np.sum(w * (lognormpdf + const)) dE1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / hyp, axis=-1) dE2 = np.sum(w * (sigma_points - modulator_mean) * modulator_var**-1 * (lognormpdf + const), axis=-1) dE_dm = np.block([dE1, dE2])[..., None] d2E1 = np.sum(w * -0.5 * self.link_fn(sigma_points)**2 / hyp, axis=-1) d2E2 = np.sum( w * 0.5 * (((sigma_points - modulator_mean) * modulator_var**-1)**2 - modulator_var**-1) * (lognormpdf + const), axis=-1) dE_dv = np.diag(np.block([d2E1, d2E2])) return exp_log_lik, dE_dm, dE_dv
def statistical_linear_regression_cubature(self, cav_mean, cav_cov, hyp=None, cubature_func=None): """ Perform statistical linear regression (SLR) using cubature. We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]). TODO: this currently assumes an additive noise model (ok for our current applications), make more general """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist. sigma_points = cholesky(cav_cov) @ np.atleast_2d(x) + cav_mean lik_expectation, lik_covariance = self.conditional_moments( sigma_points, hyp) # Compute zₙ via cubature: # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] mu = np.sum(w * lik_expectation, axis=-1)[:, None] # Compute variance S via cubature: # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]] # TODO: allow for multi-dim cubature S = np.sum(w * ((lik_expectation - mu) * (lik_expectation - mu) + lik_covariance), axis=-1)[:, None] # Compute cross covariance C via cubature: # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)' C = np.sum(w * (sigma_points - cav_mean) * (lik_expectation - mu), axis=-1)[:, None] # Compute derivative of z via cubature: # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ) omega = np.sum(w * lik_expectation * (inv(cav_cov) @ (sigma_points - cav_mean)), axis=-1)[None, :] return mu, S, C, omega
def statistical_linear_regression(self, cav_mean, cav_cov, hyp=None, cubature_func=None): """ Perform statistical linear regression (SLR) using cubature. We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]). """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) m0, m1, v0, v1 = cav_mean[0, 0], cav_mean[1, 0], cav_cov[0, 0], cav_cov[1, 1] # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist. sigma_points = cholesky(cav_cov) @ x + cav_mean var = self.link_fn(sigma_points[1])**2 # Compute zₙ via cubature: # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] mu = m0.reshape(1, 1) # Compute variance S via cubature: # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]] S = v0 + np.sum(w * var) S = S.reshape(1, 1) # Compute cross covariance C via cubature: # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)' C = np.sum(w * (sigma_points - cav_mean) * (sigma_points[0] - m0), axis=-1).reshape(2, 1) # Compute derivative of z via cubature: # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ) omega = np.block([[1., 0.]]) return mu, S, C, omega
def variational_expectation(self, y, m, v, hyp=None, cubature_func=None): """ """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) m0, m1, v0, v1 = m[0, 0], m[1, 0], v[0, 0], v[1, 1] sigma_points = np.sqrt( v1 ) * x + m1 # fsigᵢ=xᵢ√(2vₙ) + mₙ: scale locations according to cavity dist. # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ) var = self.link_fn(sigma_points)**2 log_lik = np.log(var) + var**-1 * ((y - m0)**2 + v0) weighted_log_likelihood_eval = w * log_lik # Compute expected log likelihood via cubature: # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ) exp_log_lik = -0.5 * np.log( 2 * pi) - 0.5 * np.sum(weighted_log_likelihood_eval) # Compute first derivative via cubature: dE_dm1 = np.sum((var**-1 * (y - m0 + v0)) * w) # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ) dE_dm2 = -0.5 * np.sum(weighted_log_likelihood_eval * v1**-1 * (sigma_points - m1)) # Compute derivative w.r.t. variance: dE_dv1 = -0.5 * np.sum(var**-1 * w) # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ) dE_dv2 = -0.25 * np.sum( (v1**-2 * (sigma_points - m1)**2 - v1**-1) * weighted_log_likelihood_eval) dE_dm = np.block([[dE_dm1], [dE_dm2]]) dE_dv = np.block([[dE_dv1, 0], [0., dE_dv2]]) return exp_log_lik, dE_dm, dE_dv
def moment_match_cubature(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: N.B. THIS VERSION IS SUPERCEDED BY THE FUNCTION BELOW. HOWEVER THIS ONE MAY BE MORE STABLE. Perform moment matching via cubature. Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ with EP power a. :param y: observed data (yₙ) [scalar] :param cav_mean: cavity mean (mₙ) [scalar] :param cav_cov: cavity covariance (cₙ) [scalar] :param hyp: likelihood hyperparameter [scalar] :param power: EP power / fraction (a) [scalar] :param cubature_func: the function to compute sigma points and weights to use during cubature :return: lZ: the log partition function, logZₙ [scalar] dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) cav_cho, low = cho_factor(cav_cov) # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist. sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ) weighted_likelihood_eval = w * self.evaluate_likelihood( y, sigma_points, hyp)**power # a different approach, based on the log-likelihood, which can be more stable: # ll = self.evaluate_log_likelihood(y, sigma_points) # lmax = np.max(ll) # weighted_likelihood_eval = np.exp(lmax * power) * w * np.exp(power * (ll - lmax)) # Compute partition function via cubature: # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ) Z = np.sum(weighted_likelihood_eval, axis=-1) lZ = np.log(Z) Zinv = 1.0 / Z # Compute derivative of partition function via cubature: # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ) covinv_f_m = cho_solve((cav_cho, low), sigma_points - cav_mean) dZ = np.sum( # (sigma_points - cav_mean) / cav_cov covinv_f_m * weighted_likelihood_eval, axis=-1) # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ dlZ = Zinv * dZ # Compute second derivative of partition function via cubature: # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ) d2Z = np.sum( ((sigma_points - cav_mean)**2 / cav_cov**2 - 1.0 / cav_cov) * weighted_likelihood_eval) # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ # = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ² # = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)² d2lZ = -dlZ @ dlZ.T + Zinv * d2Z id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def moment_match_cubature(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: N.B. THIS VERSION ALLOWS MULTI-DIMENSIONAL MOMENT MATCHING, BUT CAN BE UNSTABLE Perform moment matching via cubature. Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ with EP power a. :param y: observed data (yₙ) [scalar] :param cav_mean: cavity mean (mₙ) [scalar] :param cav_cov: cavity covariance (cₙ) [scalar] :param hyp: likelihood hyperparameter [scalar] :param power: EP power / fraction (a) [scalar] :param cubature_func: the function to compute sigma points and weights to use during cubature :return: lZ: the log partition function, logZₙ [scalar] dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) cav_cho, low = cho_factor(cav_cov) # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist. sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ) weighted_likelihood_eval = w * self.evaluate_likelihood( y, sigma_points, hyp)**power # Compute partition function via cubature: # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ) Z = np.sum(weighted_likelihood_eval, axis=-1) lZ = np.log(np.maximum(Z, 1e-8)) Zinv = 1.0 / np.maximum(Z, 1e-8) # Compute derivative of partition function via cubature: # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ) d1 = vmap(gaussian_first_derivative_wrt_mean, (1, None, None, 1))(sigma_points[..., None], cav_mean, cav_cov, weighted_likelihood_eval) dZ = np.sum(d1, axis=0) # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ dlZ = Zinv * dZ # Compute second derivative of partition function via cubature: # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ) d2 = vmap(gaussian_second_derivative_wrt_mean, (1, None, None, 1))(sigma_points[..., None], cav_mean, cav_cov, weighted_likelihood_eval) d2Z = np.sum(d2, axis=0) # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ # = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ² # = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)² d2lZ = -dlZ @ dlZ.T + Zinv * d2Z id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov