def __init__(self, model, threshold=None, prior=None, n_inits=10, max_opt_iters=1000, seed=0): super(BolfiPosterior, self).__init__() self.threshold = threshold self.model = model self.random_state = np.random.RandomState(seed) self.n_inits = n_inits self.max_opt_iters = max_opt_iters self.prior = prior self.dim = self.model.input_dim if self.threshold is None: # TODO: the evidence could be used for a good guess for starting locations minloc, minval = minimize(self.model.predict_mean, self.model.bounds, self.model.predictive_gradient_mean, self.prior, self.n_inits, self.max_opt_iters, random_state=self.random_state) self.threshold = minval logger.info( "Using optimized minimum value (%.4f) of the GP discrepancy mean " "function as a threshold" % (self.threshold))
def acquire(self, n, t=None): """Returns the next batch of acquisition points. Gaussian noise ~N(0, self.noise_var) is added to the acquired points. Parameters ---------- n : int Number of acquisition points to return. t : int Current acq_batch_index (starting from 0). random_state : np.random.RandomState, optional Returns ------- x : np.ndarray The shape is (n_values, input_dim) """ logger.debug('Acquiring the next batch of {} values'.format(n)) # Optimize the current minimum obj = lambda x: self.evaluate(x, t) grad_obj = lambda x: self.evaluate_gradient(x, t) xhat, _ = minimize(obj, self.model.bounds, grad_obj, self.prior, self.n_inits, self.max_opt_iters, random_state=self.random_state) # Create n copies of the minimum x = np.tile(xhat, (n, 1)) # Add noise for more efficient fitting of GP x = self._add_noise(x) return x
def test_minimize_with_known_gradient(): fun = lambda x : x[0]**2 + (x[1]-1)**4 grad = lambda x : np.array([2*x[0], 4*(x[1]-1)**3]) bounds = ((-2, 2), (-2, 3)) loc, val = minimize(fun, bounds, grad) assert np.isclose(val, 0, atol=0.01) assert np.allclose(loc, np.array([0, 1]), atol=0.02)
def test_minimize_with_approx_gradient(): def fun(x): return x[0]**2 + (x[1] - 1)**4 bounds = ((-2, 2), (-2, 3)) loc, val = minimize(fun, bounds) assert np.isclose(val, 0, atol=0.01) assert np.allclose(loc, np.array([0, 1]), atol=0.02)
def estimate_M(self, t): """ Estimate function maximum value """ logger.info("Estimating M..") obj = lambda x: self.a.evaluate(x, t=t) # minimization loc, val = minimize(obj, self.model.bounds, random_state=self.a.random_state, maxiter=500, n_start_points=5) return -1.0 * float(val) # maximization
def test_minimize_with_constraints(): def fun(x): return x[0]**2 + (x[1] - 1)**4 bounds = ((-2, 2), (-2, 3)) # Test constraint y >= x constraints = ({'type': 'ineq', 'fun': lambda x: x[1] - x[0]}) loc, val = minimize(fun, bounds, constraints=constraints, method='SLSQP') assert np.isclose(val, 0, atol=0.01) assert np.allclose(loc, np.array([0, 1]), atol=0.02)
def test_minimize_with_known_gradient(): def fun(x): return x[0]**2 + (x[1] - 1)**4 def grad(x): return np.array([2 * x[0], 4 * (x[1] - 1)**3]) bounds = ((-2, 2), (-2, 3)) loc, val = minimize(fun, bounds, grad) assert np.isclose(val, 0, atol=0.01) assert np.allclose(loc, np.array([0, 1]), atol=0.02)
def _compute_map_estimates(self): """Return the maximum a posterior estimate for each parameter.""" minimum_location, _ = minimize( fun=self._negative_pdf, bounds=self._model.bounds, grad=self._negative_gradient_pdf, prior=self._prior, n_start_points=self._n_opt_inits, maxiter=self._max_opt_iters ) return minimum_location
def estimate_L(self, t): """ Return a list of acq surface gradient absolute value max for each dimension """ L = list() for i in range(len(self.model.bounds)): logger.info("Estimating L {}..".format(i)) grad_obj = lambda x: -np.abs( float(self.a.evaluate_gradient(x, t=t)[0][i])) # abs max loc, val = minimize(grad_obj, self.model.bounds, random_state=self.a.random_state, maxiter=500, n_start_points=5) # expensive to evaluate L.append(abs(val)) return L
def __init__(self, model, threshold=None, prior=None, n_inits=10, max_opt_iters=1000, seed=0): """Initialize a BOLFI posterior. Parameters ---------- model : elfi.bo.gpy_regression.GPyRegression Instance of the surrogate model threshold : float, optional The threshold value used in the calculation of the posterior, see the BOLFI paper for details. By default, the minimum value of discrepancy estimate mean is used. prior : ScipyLikeDistribution, optional By default uniform distribution within model bounds. n_inits : int, optional Number of initialization points in internal optimization. max_opt_iters : int, optional Maximum number of iterations performed in internal optimization. seed : int, optional """ super(BolfiPosterior, self).__init__() self.threshold = threshold self.model = model self.random_state = np.random.RandomState(seed) self.n_inits = n_inits self.max_opt_iters = max_opt_iters self.prior = prior self.dim = self.model.input_dim if type(self.model).__name__ == 'DGPRegression': predictive_gradient_mean = None elif type(self.model).__name__ == 'GPyRegression': predictive_gradient_mean = self.model.predictive_gradient_mean if self.threshold is None: # TODO: the evidence could be used for a good guess for starting locations minloc, minval = minimize( self.model.predict_mean, self.model.bounds, predictive_gradient_mean, self.prior, self.n_inits, self.max_opt_iters, random_state=self.random_state) self.threshold = minval logger.info("Using optimized minimum value (%.4f) of the GP discrepancy mean " "function as a threshold" % (self.threshold))
def acquire(self, n, t=None): """Return the next batch of acquisition points. Gaussian noise ~N(0, self.noise_var) is added to the acquired points. Parameters ---------- n : int Number of acquisition points to return. t : int Current acq_batch_index (starting from 0). Returns ------- x : np.ndarray The shape is (n, input_dim) """ logger.debug('Acquiring the next batch of %d values', n) # Optimize the current minimum def obj(x): return self.evaluate(x, t) def grad_obj(x): return self.evaluate_gradient(x, t) xhat, _ = minimize( obj, self.model.bounds, method='L-BFGS-B' if self.constraints is None else 'SLSQP', constraints=self.constraints, grad=grad_obj, prior=self.prior, n_start_points=self.n_inits, maxiter=self.max_opt_iters, random_state=self.random_state) # Create n copies of the minimum x = np.tile(xhat, (n, 1)) # Add noise for more efficient fitting of GP x = self._add_noise(x) return x
def _acq(self, pending_locations, t): phis = [] if pending_locations is not None: for pl in pending_locations: #print("Pending: {}".format(pl)) mean, var = self.model.predict(pl, noiseless=True) mean = -1.0 * float(mean) # maximization var = float(var) phis.append((pl, mean, var)) def trans(x, t): # negation as the GPLCA formulation is for maximization return self.g(-1.0 * float(self.a.evaluate(x, t))) def pend(x): val = 1.0 for pl, mean, var in phis: val *= self.p(x, pl, self.L, self.M, mean, var) return val def obj(x, t): # negation as we use a minimizer to solve a maximization problem return -1.0 * trans(x, t) * pend(x) loc, val = minimize(partial(obj, t=t), self.model.bounds, random_state=self.a.random_state, maxiter=500) if True: #self._debug_print("GP mean", lambda x: self.a.model.predict(x, noiseless=True)[0]) #self._debug_print("GP std", lambda x: self.a.model.predict(x, noiseless=True)[1]) #self._debug_print("Original surface", partial(self.a.evaluate, t=t)) #self._debug_print("Transformed surface", partial(trans, t=t)) #self._debug_print("Pending points modifier", pend) self._debug_print("Final surface (M={:.2f}, L={})".format( self.M, "".join(["{:.2f} ".format(l) for l in self.L])), partial(obj, t=t), loc=loc) return loc
def acquire(self, n, t=None): """Acquire a batch of acquisition points. Parameters ---------- n : int Number of acquisitions. t : int, optional Current iteration, (unused). Returns ------- array_like Coordinates of the yielded acquisition points. """ logger.debug('Acquiring the next batch of %d values', n) gp = self.model # Updating the ABC threshold. self.eps = np.percentile(gp.Y, self.quantile_eps * 100) def _negate_eval(theta): return -self.evaluate(theta) def _negate_eval_grad(theta): return -self.evaluate_gradient(theta) # Obtaining the location where the variance is maximised. theta_max, _ = minimize(_negate_eval, gp.bounds, _negate_eval_grad, self.prior, self.n_inits, self.max_opt_iters, random_state=self.random_state) # Using the same location for all points in theta batch. batch_theta = np.tile(theta_max, (n, 1)) return batch_theta
def test_BOLFI(): m, true_params = setup_ma2_with_informative_data() # Log discrepancy tends to work better log_d = NodeReference(m['d'], state=dict(_operation=np.log), model=m, name='log_d') bolfi = elfi.BOLFI(log_d, initial_evidence=20, update_interval=10, batch_size=5, bounds={ 't1': (-2, 2), 't2': (-1, 1) }, acq_noise_var=.1) n = 300 res = bolfi.infer(300) assert bolfi.target_model.n_evidence == 300 acq_x = bolfi.target_model._gp.X # check_inference_with_informative_data(res, 1, true_params, error_bound=.2) assert np.abs(res.x_min['t1'] - true_params['t1']) < 0.2 assert np.abs(res.x_min['t2'] - true_params['t2']) < 0.2 # Test that you can continue the inference where we left off res = bolfi.infer(n + 10) assert bolfi.target_model.n_evidence == n + 10 assert np.array_equal(bolfi.target_model._gp.X[:n, :], acq_x) post = bolfi.extract_posterior() # TODO: make cleaner. post_ml = minimize(post._neg_unnormalized_loglikelihood, post.model.bounds, post._gradient_neg_unnormalized_loglikelihood, post.prior, post.n_inits, post.max_opt_iters, random_state=post.random_state)[0] # TODO: Here we cannot use the minimize method due to sharp edges in the posterior. # If a MAP method is implemented, one must be able to set the optimizer and # provide its options. post_map = stochastic_optimization(post._neg_unnormalized_logposterior, post.model.bounds)[0] vals_ml = dict(t1=np.array([post_ml[0]]), t2=np.array([post_ml[1]])) check_inference_with_informative_data(vals_ml, 1, true_params, error_bound=.2) vals_map = dict(t1=np.array([post_map[0]]), t2=np.array([post_map[1]])) check_inference_with_informative_data(vals_map, 1, true_params, error_bound=.2) n_samples = 400 n_chains = 4 res_sampling = bolfi.sample(n_samples, n_chains=n_chains) check_inference_with_informative_data(res_sampling.samples, n_samples // 2 * n_chains, true_params, error_bound=.2) # check the cached predictions for RBF x = np.random.random((1, len(true_params))) bolfi.target_model.is_sampling = True pred_mu, pred_var = bolfi.target_model._gp.predict(x) pred_cached_mu, pred_cached_var = bolfi.target_model.predict(x) assert (np.allclose(pred_mu, pred_cached_mu)) assert (np.allclose(pred_var, pred_cached_var)) grad_mu, grad_var = bolfi.target_model._gp.predictive_gradients(x) grad_cached_mu, grad_cached_var = bolfi.target_model.predictive_gradients( x) assert (np.allclose(grad_mu[:, :, 0], grad_cached_mu)) assert (np.allclose(grad_var, grad_cached_var)) # test calculation of prior logpdfs true_logpdf_prior = ma2.CustomPrior1.logpdf(x[0, 0], 2) true_logpdf_prior += ma2.CustomPrior2.logpdf(x[0, 1], x[0, 0, ], 1) assert np.isclose(true_logpdf_prior, post.prior.logpdf(x[0, :]))
def acquire(self, n, t): """Acquire a batch of acquisition points. Parameters ---------- n : int Number of acquisitions. t : int Current iteration. Returns ------- array_like Coordinates of the yielded acquisition points. """ logger.debug('Acquiring the next batch of %d values', n) gp = self.model self.sigma2_n = gp.noise # Updating the discrepancy threshold. self.eps = np.percentile(gp.Y, self.quantile_eps * 100) # Performing the importance sampling step every self._iter_imp iterations. if self._integration == 'importance' and t % self._iter_imp == 0: self.points_int = self.density_is.acquire(self._n_samples_imp) # Obtaining the omegas_int and priors_int terms to be used in the evaluate function. self.mean_int, self.var_int = gp.predict(self.points_int, noiseless=True) self.priors_int = (self.prior.pdf(self.points_int)**2)[np.newaxis, :] if self._integration == 'importance' and t % self._iter_imp == 0: omegas_int_unnormalised = ( 1 / MaxVar.evaluate(self, self.points_int)).T self.omegas_int = omegas_int_unnormalised / \ np.sum(omegas_int_unnormalised, axis=1)[:, np.newaxis] elif self._integration == 'grid': self.omegas_int = np.empty(len(self.points_int)) self.omegas_int.fill(1 / len(self.points_int)) # Initialising the attributes used in the evaluate function. self.thetas_old = np.array(gp.X) self._K = gp._gp.kern.K self.K = self._K(self.thetas_old, self.thetas_old) + \ self.sigma2_n * np.identity(self.thetas_old.shape[0]) self.k_int_old = self._K(self.points_int, self.thetas_old).T self.phi_int = ss.norm.cdf(self.eps, loc=self.mean_int.T, scale=np.sqrt(self.sigma2_n + self.var_int.T)) # Obtaining the location where the expected loss is minimised. # Note: The gradient is computed numerically as GPy currently does not # directly provide the derivative computations used in Järvenpää et al., 2017. theta_min, _ = minimize(self.evaluate, gp.bounds, grad=None, prior=self.prior, n_start_points=self.n_inits, maxiter=self.max_opt_iters, random_state=self.random_state) # Using the same location for all points in the batch. batch_theta = np.tile(theta_min, (n, 1)) return batch_theta